src/mdlib/genborn.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2008, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42
  43 #include "typedefs.h"
  44 #include "smalloc.h"
  45 #include "genborn.h"
  46 #include "vec.h"
  47 #include "grompp.h"
  48 #include "pdbio.h"
  49 #include "names.h"
  50 #include "physics.h"
  51 #include "partdec.h"
  52 #include "domdec.h"
  53 #include "network.h"
  54 #include "gmx_fatal.h"
  55 #include "mtop_util.h"
  56 #include "pbc.h"
  57 #include "nrnb.h"
  58 #include "bondf.h"
  59
  60 #ifdef GMX_LIB_MPI
  61 #include <mpi.h>
  62 #endif
  63 #ifdef GMX_THREADS
  64 #include "tmpi.h"
  65 #endif
  66
  67 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) )
  68 #ifdef GMX_DOUBLE
  69 #include "genborn_sse2_double.h"
  70 #else
  71 #include "genborn_sse2_single.h"
  72 #include "genborn_allvsall_sse2_single.h"
  73 #endif /* GMX_DOUBLE */
  74 #endif /* GMX_SSE */
  75
  76 #include "genborn_allvsall.h"
  77
  78     //#define DISABLE_SSE
  79
  80 typedef struct {
  81     int shift;
  82     int naj;
  83     int *aj;
  84     int aj_nalloc;
  85 } gbtmpnbl_t;
  86
  87 typedef struct gbtmpnbls {
  88     int nlist;
  89     gbtmpnbl_t *list;
  90     int list_nalloc;
  91 } t_gbtmpnbls;
  92
  93 /* This function is exactly the same as the one in bondfree.c. The reason
  94  * it is copied here is that the bonded gb-interactions are evaluated
  95  * not in calc_bonds, but rather in calc_gb_forces
  96  */
  97 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
  98 {
  99         if (pbc) {
 100                 return pbc_dx_aiuc(pbc,xi,xj,dx);
 101         }
 102         else {
 103                 rvec_sub(xi,xj,dx);
 104                 return CENTRAL;
 105         }
 106 }
 107
 108 int init_gb_nblist(int natoms, t_nblist *nl)
 109 {
 110     nl->maxnri      = natoms*4;
 111     nl->maxnrj      = 0;
 112     nl->maxlen      = 0;
 113     nl->nri         = 0;
 114     nl->nrj         = 0;
 115     nl->iinr        = NULL;
 116     nl->gid         = NULL;
 117     nl->shift       = NULL;
 118     nl->jindex      = NULL;
 119     nl->jjnr        = NULL;
 120     /*nl->nltype      = nltype;*/
 121
 122     srenew(nl->iinr,   nl->maxnri);
 123     srenew(nl->gid,    nl->maxnri);
 124     srenew(nl->shift,  nl->maxnri);
 125     srenew(nl->jindex, nl->maxnri+1);
 126
 127     nl->jindex[0] = 0;
 128
 129     return 0;
 130 }
 131
 132 int print_nblist(int natoms, t_nblist *nl)
 133 {
 134     int i,k,ai,aj,nj0,nj1;
 135
 136     printf("genborn.c: print_nblist, natoms=%d\n",nl->nri);
 137
 138     for(i=0;i<nl->nri;i++)
 139     {
 140         ai=nl->iinr[i];
 141         nj0=nl->jindex[i];
 142         nj1=nl->jindex[i+1];
 143
 144         for(k=nj0;k<nj1;k++)
 145         {
 146             aj=nl->jjnr[k];
 147             printf("ai=%d, aj=%d\n",ai,aj);
 148         }
 149     }
 150
 151     return 0;
 152 }
 153
 154 typedef union {
 155     real numlog;
 156     int exp;
 157 } u_table;
 158
 159 void fill_log_table(const int n, real *table)
 160 {
 161     u_table log_table;
 162     real logfactor;
 163     int i;
 164
 165     int incr = 1 << (23-n);
 166     int p=pow(2,n);
 167
 168     logfactor = 1.0/log(2.0);
 169
 170     log_table.exp = 0x3F800000;
 171
 172     for(i=0;i<p;++i)
 173     {
 174         /* log2(numlog)=log(numlog)/log(2.0) */
 175         table[i]=log(log_table.numlog)*logfactor;
 176         log_table.exp+=incr;
 177     }
 178 }
 179
 180
 181 real table_log(real val, const real *table, const int n)
 182 {
 183     int *const exp_ptr = ((int*)&val);
 184     int x              = *exp_ptr;
 185     const int log_2    = ((x>>23) & 255) - 127;
 186     x &= 0x7FFFFF;
 187     x = x >> (23-n);
 188     val = table[x];
 189     return ((val+log_2)*0.69314718);
 190 }
 191
 192 void gb_pd_send(t_commrec *cr, real *send_data, int nr)
 193 {
 194 #ifdef GMX_MPI
 195     int i,cur;
 196     int *index,*sendc,*disp;
 197
 198     snew(sendc,cr->nnodes);
 199     snew(disp,cr->nnodes);
 200
 201     index = pd_index(cr);
 202     cur   = cr->nodeid;
 203
 204     /* Setup count/index arrays */
 205     for(i=0;i<cr->nnodes;i++)
 206     {
 207         sendc[i]  = index[i+1]-index[i];
 208         disp[i]   = index[i];
 209     }
 210
 211     /* Do communication */
 212     MPI_Gatherv(send_data+index[cur],sendc[cur],GMX_MPI_REAL,send_data,sendc,
 213                 disp,GMX_MPI_REAL,0,cr->mpi_comm_mygroup);
 214     MPI_Bcast(send_data,nr,GMX_MPI_REAL,0,cr->mpi_comm_mygroup);
 215
 216 #endif
 217 }
 218
 219
 220 int init_gb_plist(t_params *p_list)
 221 {
 222     p_list->nr    = 0;
 223     p_list->param = NULL;
 224
 225     return 0;
 226 }
 227
 228
 229
 230 int init_gb_still(const t_commrec *cr, t_forcerec  *fr,
 231                   const t_atomtypes *atype, t_idef *idef, t_atoms *atoms,
 232                   gmx_genborn_t *born,int natoms)
 233 {
 234
 235     int i,j,i1,i2,k,m,nbond,nang,ia,ib,ic,id,nb,idx,idx2,at;
 236     int iam,ibm;
 237     int at0,at1;
 238     real length,angle;
 239     real r,ri,rj,ri2,ri3,rj2,r2,r3,r4,rk,ratio,term,h,doffset;
 240     real p1,p2,p3,factor,cosine,rab,rbc;
 241
 242     real *vsol;
 243     real *gp;
 244
 245     snew(vsol,natoms);
 246     snew(gp,natoms);
 247     snew(born->gpol_still_work,natoms+3);
 248
 249     if(PAR(cr))
 250     {
 251         if(PARTDECOMP(cr))
 252         {
 253             pd_at_range(cr,&at0,&at1);
 254
 255             for(i=0;i<natoms;i++)
 256             {
 257                 vsol[i] = gp[i] = 0;
 258             }
 259         }
 260         else
 261         {
 262             at0 = 0;
 263             at1 = natoms;
 264         }
 265     }
 266     else
 267     {
 268         at0 = 0;
 269         at1 = natoms;
 270     }
 271
 272     doffset = born->gb_doffset;
 273
 274     for(i=0;i<natoms;i++)
 275     {
 276         born->gpol_globalindex[i]=born->vsolv_globalindex[i]=
 277             born->gb_radius_globalindex[i]=0;
 278     }
 279
 280     /* Compute atomic solvation volumes for Still method */
 281     for(i=0;i<natoms;i++)
 282     {
 283         ri=atype->gb_radius[atoms->atom[i].type];
 284         born->gb_radius_globalindex[i] = ri;
 285         r3=ri*ri*ri;
 286         born->vsolv_globalindex[i]=(4*M_PI/3)*r3;
 287     }
 288
 289     for(j=0;j<idef->il[F_GB12].nr;j+=3)
 290     {
 291         m=idef->il[F_GB12].iatoms[j];
 292         ia=idef->il[F_GB12].iatoms[j+1];
 293         ib=idef->il[F_GB12].iatoms[j+2];
 294
 295         r=1.01*idef->iparams[m].gb.st;
 296
 297         ri   = atype->gb_radius[atoms->atom[ia].type];
 298         rj   = atype->gb_radius[atoms->atom[ib].type];
 299
 300         ri2  = ri*ri;
 301         ri3  = ri2*ri;
 302         rj2  = rj*rj;
 303
 304         ratio  = (rj2-ri2-r*r)/(2*ri*r);
 305         h      = ri*(1+ratio);
 306         term   = (M_PI/3.0)*h*h*(3.0*ri-h);
 307
 308         if(PARTDECOMP(cr))
 309         {
 310             vsol[ia]+=term;
 311         }
 312         else
 313         {
 314             born->vsolv_globalindex[ia] -= term;
 315         }
 316
 317         ratio  = (ri2-rj2-r*r)/(2*rj*r);
 318         h      = rj*(1+ratio);
 319         term   = (M_PI/3.0)*h*h*(3.0*rj-h);
 320
 321         if(PARTDECOMP(cr))
 322         {
 323             vsol[ib]+=term;
 324         }
 325         else
 326         {
 327             born->vsolv_globalindex[ib] -= term;
 328         }
 329     }
 330
 331     if(PARTDECOMP(cr))
 332     {
 333         gmx_sum(natoms,vsol,cr);
 334
 335         for(i=0;i<natoms;i++)
 336         {
 337             born->vsolv_globalindex[i]=born->vsolv_globalindex[i]-vsol[i];
 338         }
 339     }
 340
 341     /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
 342        method */
 343     /* Self */
 344     for(j=0;j<natoms;j++)
 345     {
 346         if(born->use_globalindex[j]==1)
 347         {
 348             born->gpol_globalindex[j]=-0.5*ONE_4PI_EPS0/
 349                 (atype->gb_radius[atoms->atom[j].type]-doffset+STILL_P1);
 350         }
 351     }
 352
 353     /* 1-2 */
 354     for(j=0;j<idef->il[F_GB12].nr;j+=3)
 355     {
 356         m=idef->il[F_GB12].iatoms[j];
 357         ia=idef->il[F_GB12].iatoms[j+1];
 358         ib=idef->il[F_GB12].iatoms[j+2];
 359
 360         r=idef->iparams[m].gb.st;
 361
 362         r4=r*r*r*r;
 363
 364         if(PARTDECOMP(cr))
 365         {
 366             gp[ia]+=STILL_P2*born->vsolv_globalindex[ib]/r4;
 367             gp[ib]+=STILL_P2*born->vsolv_globalindex[ia]/r4;
 368         }
 369         else
 370         {
 371             born->gpol_globalindex[ia]=born->gpol_globalindex[ia]+
 372                 STILL_P2*born->vsolv_globalindex[ib]/r4;
 373             born->gpol_globalindex[ib]=born->gpol_globalindex[ib]+
 374                 STILL_P2*born->vsolv_globalindex[ia]/r4;
 375         }
 376     }
 377
 378     /* 1-3 */
 379     for(j=0;j<idef->il[F_GB13].nr;j+=3)
 380     {
 381         m=idef->il[F_GB13].iatoms[j];
 382         ia=idef->il[F_GB13].iatoms[j+1];
 383         ib=idef->il[F_GB13].iatoms[j+2];
 384
 385         r=idef->iparams[m].gb.st;
 386         r4=r*r*r*r;
 387
 388         if(PARTDECOMP(cr))
 389         {
 390             gp[ia]+=STILL_P3*born->vsolv[ib]/r4;
 391             gp[ib]+=STILL_P3*born->vsolv[ia]/r4;
 392         }
 393         else
 394         {
 395             born->gpol_globalindex[ia]=born->gpol_globalindex[ia]+
 396                 STILL_P3*born->vsolv_globalindex[ib]/r4;
 397             born->gpol_globalindex[ib]=born->gpol_globalindex[ib]+
 398                 STILL_P3*born->vsolv_globalindex[ia]/r4;
 399         }
 400     }
 401
 402     if(PARTDECOMP(cr))
 403     {
 404         gmx_sum(natoms,gp,cr);
 405
 406         for(i=0;i<natoms;i++)
 407         {
 408             born->gpol_globalindex[i]=born->gpol_globalindex[i]+gp[i];
 409         }
 410     }
 411
 412     sfree(vsol);
 413     sfree(gp);
 414
 415     return 0;
 416 }
 417
 418
 419
 420 #define LOG_TABLE_ACCURACY 15 /* Accuracy of the table logarithm */
 421
 422
 423 /* Initialize all GB datastructs and compute polarization energies */
 424 int init_gb(gmx_genborn_t **p_born,
 425             const t_commrec *cr, t_forcerec *fr, const t_inputrec *ir,
 426             const gmx_mtop_t *mtop, real rgbradii, int gb_algorithm)
 427 {
 428     int i,j,m,ai,aj,jj,natoms,nalloc;
 429     real rai,sk,p,doffset;
 430
 431     t_atoms        atoms;
 432     gmx_genborn_t  *born;
 433     gmx_localtop_t *localtop;
 434
 435     natoms   = mtop->natoms;
 436
 437     atoms    = gmx_mtop_global_atoms(mtop);
 438     localtop = gmx_mtop_generate_local_top(mtop,ir);
 439
 440     snew(born,1);
 441     *p_born = born;
 442
 443         born->nr = fr->natoms_force;
 444     born->nr  = natoms;
 445
 446     snew(born->drobc, natoms);
 447     snew(born->bRad,  natoms);
 448
 449     /* Allocate memory for the global data arrays */
 450     snew(born->param_globalindex, natoms+3);
 451     snew(born->gpol_globalindex,  natoms+3);
 452     snew(born->vsolv_globalindex, natoms+3);
 453     snew(born->gb_radius_globalindex, natoms+3);
 454     snew(born->use_globalindex,    natoms+3);
 455
 456     snew(fr->invsqrta, natoms);
 457     snew(fr->dvda,     natoms);
 458
 459     fr->dadx              = NULL;
 460     fr->dadx_rawptr       = NULL;
 461     fr->nalloc_dadx       = 0;
 462     born->gpol_still_work = NULL;
 463     born->gpol_hct_work   = NULL;
 464
 465     /* snew(born->asurf,natoms); */
 466     /* snew(born->dasurf,natoms); */
 467
 468     /* Initialize the gb neighbourlist */
 469     init_gb_nblist(natoms,&(fr->gblist));
 470
 471     /* Do the Vsites exclusions (if any) */
 472     for(i=0;i<natoms;i++)
 473     {
 474         jj = atoms.atom[i].type;
 475         if (mtop->atomtypes.gb_radius[atoms.atom[i].type] > 0)
 476         {
 477             born->use_globalindex[i] = 1;
 478         }
 479         else
 480         {
 481             born->use_globalindex[i] = 0;
 482         }
 483
 484         /* If we have a Vsite, put vs_globalindex[i]=0 */
 485         if (C6 (fr->nbfp,fr->ntype,jj,jj) == 0 &&
 486             C12(fr->nbfp,fr->ntype,jj,jj) == 0 &&
 487             atoms.atom[i].q == 0)
 488         {
 489             born->use_globalindex[i]=0;
 490         }
 491     }
 492
 493     /* Copy algorithm parameters from inputrecord to local structure */
 494     born->obc_alpha  = ir->gb_obc_alpha;
 495     born->obc_beta   = ir->gb_obc_beta;
 496     born->obc_gamma  = ir->gb_obc_gamma;
 497     born->gb_doffset = ir->gb_dielectric_offset;
 498     born->gb_epsilon_solvent = ir->gb_epsilon_solvent;
 499     born->epsilon_r = ir->epsilon_r;
 500
 501     doffset = born->gb_doffset;
 502
 503     /* If Still model, initialise the polarisation energies */
 504     if(gb_algorithm==egbSTILL)
 505     {
 506         init_gb_still(cr, fr,&(mtop->atomtypes), &(localtop->idef), &atoms,
 507                       born, natoms);
 508     }
 509
 510
 511     /* If HCT/OBC,  precalculate the sk*atype->S_hct factors */
 512     else if(gb_algorithm==egbHCT || gb_algorithm==egbOBC)
 513     {
 514
 515         snew(born->gpol_hct_work, natoms+3);
 516
 517         for(i=0;i<natoms;i++)
 518         {
 519             if(born->use_globalindex[i]==1)
 520             {
 521                 rai = mtop->atomtypes.gb_radius[atoms.atom[i].type]-doffset;
 522                 sk  = rai * mtop->atomtypes.S_hct[atoms.atom[i].type];
 523                 born->param_globalindex[i] = sk;
 524                 born->gb_radius_globalindex[i] = rai;
 525             }
 526             else
 527             {
 528                 born->param_globalindex[i] = 0;
 529                 born->gb_radius_globalindex[i] = 0;
 530             }
 531         }
 532     }
 533
 534     /* Init the logarithm table */
 535     p=pow(2,LOG_TABLE_ACCURACY);
 536     snew(born->log_table, p);
 537
 538     fill_log_table(LOG_TABLE_ACCURACY, born->log_table);
 539
 540     /* Allocate memory for work arrays for temporary use */
 541     snew(born->work,natoms+4);
 542     snew(born->count,natoms);
 543     snew(born->nblist_work,natoms);
 544
 545     /* Domain decomposition specific stuff */
 546     if(DOMAINDECOMP(cr))
 547     {
 548         snew(born->dd_work,natoms);
 549         born->nlocal = cr->dd->nat_tot; /* cr->dd->nat_tot will be zero here */
 550     }
 551
 552     return 0;
 553 }
 554
 555
 556
 557 static int
 558 calc_gb_rad_still(t_commrec *cr, t_forcerec *fr,int natoms, gmx_localtop_t *top,
 559                   const t_atomtypes *atype, rvec x[], t_nblist *nl,
 560                   gmx_genborn_t *born,t_mdatoms *md)
 561 {
 562     int i,k,n,nj0,nj1,ai,aj,type;
 563     int shift;
 564     real shX,shY,shZ;
 565     real gpi,dr,dr2,dr4,idr4,rvdw,ratio,ccf,theta,term,rai,raj;
 566     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
 567     real rinv,idr2,idr6,vaj,dccf,cosq,sinq,prod,gpi2;
 568     real factor;
 569     real vai, prod_ai, icf4,icf6;
 570
 571     factor  = 0.5*ONE_4PI_EPS0;
 572     n       = 0;
 573
 574     for(i=0;i<born->nr;i++)
 575     {
 576         born->gpol_still_work[i]=0;
 577     }
 578
 579         for(i=0;i<nl->nri;i++ )
 580     {
 581         ai      = nl->iinr[i];
 582
 583         nj0     = nl->jindex[i];
 584         nj1     = nl->jindex[i+1];
 585
 586         /* Load shifts for this list */
 587         shift   = nl->shift[i];
 588         shX     = fr->shift_vec[shift][0];
 589         shY     = fr->shift_vec[shift][1];
 590         shZ     = fr->shift_vec[shift][2];
 591
 592         gpi     = 0;
 593
 594         rai     = top->atomtypes.gb_radius[md->typeA[ai]];
 595         vai     = born->vsolv[ai];
 596         prod_ai = STILL_P4*vai;
 597
 598         /* Load atom i coordinates, add shift vectors */
 599         ix1     = shX + x[ai][0];
 600         iy1     = shY + x[ai][1];
 601         iz1     = shZ + x[ai][2];
 602
 603         for(k=nj0;k<nj1;k++)
 604         {
 605             aj    = nl->jjnr[k];
 606             jx1   = x[aj][0];
 607             jy1   = x[aj][1];
 608             jz1   = x[aj][2];
 609
 610             dx11  = ix1-jx1;
 611             dy11  = iy1-jy1;
 612             dz11  = iz1-jz1;
 613
 614             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 615             rinv  = gmx_invsqrt(dr2);
 616             idr2  = rinv*rinv;
 617             idr4  = idr2*idr2;
 618             idr6  = idr4*idr2;
 619
 620             raj = top->atomtypes.gb_radius[md->typeA[aj]];
 621
 622             rvdw  = rai + raj;
 623
 624             ratio = dr2 / (rvdw * rvdw);
 625             vaj   = born->vsolv[aj];
 626
 627             if(ratio>STILL_P5INV)
 628             {
 629                 ccf=1.0;
 630                 dccf=0.0;
 631             }
 632             else
 633             {
 634                 theta = ratio*STILL_PIP5;
 635                 cosq  = cos(theta);
 636                 term  = 0.5*(1.0-cosq);
 637                 ccf   = term*term;
 638                 sinq  = 1.0 - cosq*cosq;
 639                 dccf  = 2.0*term*sinq*gmx_invsqrt(sinq)*theta;
 640             }
 641
 642             prod          = STILL_P4*vaj;
 643             icf4          = ccf*idr4;
 644             icf6          = (4*ccf-dccf)*idr6;
 645
 646             born->gpol_still_work[aj] += prod_ai*icf4;
 647             gpi             = gpi+prod*icf4;
 648
 649             /* Save ai->aj and aj->ai chain rule terms */
 650             fr->dadx[n++]   = prod*icf6;
 651             fr->dadx[n++]   = prod_ai*icf6;
 652         }
 653         born->gpol_still_work[ai]+=gpi;
 654     }
 655
 656     /* Parallel summations */
 657     if(PARTDECOMP(cr))
 658     {
 659         gmx_sum(natoms, born->gpol_still_work, cr);
 660     }
 661     else if(DOMAINDECOMP(cr))
 662     {
 663         dd_atom_sum_real(cr->dd, born->gpol_still_work);
 664     }
 665
 666     /* Calculate the radii */
 667         for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
 668     {
 669                 if(born->use[i] != 0)
 670         {
 671
 672             gpi  = born->gpol[i]+born->gpol_still_work[i];
 673             gpi2 = gpi * gpi;
 674             born->bRad[i]   = factor*gmx_invsqrt(gpi2);
 675             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 676                 }
 677     }
 678
 679     /* Extra communication required for DD */
 680     if(DOMAINDECOMP(cr))
 681     {
 682         dd_atom_spread_real(cr->dd, born->bRad);
 683         dd_atom_spread_real(cr->dd, fr->invsqrta);
 684     }
 685
 686     return 0;
 687
 688 }
 689
 690
 691 static int
 692 calc_gb_rad_hct(t_commrec *cr,t_forcerec *fr,int natoms, gmx_localtop_t *top,
 693                 const t_atomtypes *atype, rvec x[], t_nblist *nl,
 694                 gmx_genborn_t *born,t_mdatoms *md)
 695 {
 696     int i,k,n,ai,aj,nj0,nj1,at0,at1;
 697     int shift;
 698     real shX,shY,shZ;
 699     real rai,raj,gpi,dr2,dr,sk,sk_ai,sk2,sk2_ai,lij,uij,diff2,tmp,sum_ai;
 700     real rad,min_rad,rinv,rai_inv;
 701     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
 702     real lij2, uij2, lij3, uij3, t1,t2,t3;
 703     real lij_inv,dlij,duij,sk2_rinv,prod,log_term;
 704     real doffset,raj_inv,dadx_val;
 705     real *gb_radius;
 706
 707     doffset = born->gb_doffset;
 708     gb_radius = born->gb_radius;
 709
 710     for(i=0;i<born->nr;i++)
 711     {
 712         born->gpol_hct_work[i] = 0;
 713     }
 714
 715     /* Keep the compiler happy */
 716     n    = 0;
 717     prod = 0;
 718
 719     for(i=0;i<nl->nri;i++)
 720     {
 721         ai     = nl->iinr[i];
 722
 723         nj0    = nl->jindex[ai];
 724         nj1    = nl->jindex[ai+1];
 725
 726         /* Load shifts for this list */
 727         shift   = nl->shift[i];
 728         shX     = fr->shift_vec[shift][0];
 729         shY     = fr->shift_vec[shift][1];
 730         shZ     = fr->shift_vec[shift][2];
 731
 732         rai     = gb_radius[ai];
 733         rai_inv = 1.0/rai;
 734
 735         sk_ai   = born->param[ai];
 736         sk2_ai  = sk_ai*sk_ai;
 737
 738         /* Load atom i coordinates, add shift vectors */
 739         ix1     = shX + x[ai][0];
 740         iy1     = shY + x[ai][1];
 741         iz1     = shZ + x[ai][2];
 742
 743         sum_ai  = 0;
 744
 745         for(k=nj0;k<nj1;k++)
 746         {
 747             aj    = nl->jjnr[k];
 748
 749             jx1   = x[aj][0];
 750             jy1   = x[aj][1];
 751             jz1   = x[aj][2];
 752
 753             dx11  = ix1 - jx1;
 754             dy11  = iy1 - jy1;
 755             dz11  = iz1 - jz1;
 756
 757             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 758             rinv  = gmx_invsqrt(dr2);
 759             dr    = rinv*dr2;
 760
 761             sk    = born->param[aj];
 762             raj   = gb_radius[aj];
 763
 764             /* aj -> ai interaction */
 765             if(rai < dr+sk)
 766             {
 767                 lij     = 1.0/(dr-sk);
 768                 dlij    = 1.0;
 769
 770                 if(rai>dr-sk)
 771                 {
 772                     lij  = rai_inv;
 773                     dlij = 0.0;
 774                 }
 775
 776                 lij2     = lij*lij;
 777                 lij3     = lij2*lij;
 778
 779                 uij      = 1.0/(dr+sk);
 780                 uij2     = uij*uij;
 781                 uij3     = uij2*uij;
 782
 783                 diff2    = uij2-lij2;
 784
 785                 lij_inv  = gmx_invsqrt(lij2);
 786                 sk2      = sk*sk;
 787                 sk2_rinv = sk2*rinv;
 788                 prod     = 0.25*sk2_rinv;
 789
 790                 /* log_term = table_log(uij*lij_inv,born->log_table,
 791                    LOG_TABLE_ACCURACY); */
 792                 log_term = log(uij*lij_inv);
 793
 794                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 795                     prod*(-diff2);
 796
 797                 if(rai<sk-dr)
 798                 {
 799                     tmp = tmp + 2.0 * (rai_inv-lij);
 800                 }
 801
 802                 t1 = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 803                 t2 = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 804                 t3 = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 805
 806                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule */
 807                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
 808                 /* rb2 is moved to chainrule    */
 809
 810                 sum_ai += 0.5*tmp;
 811             }
 812             else
 813             {
 814                 dadx_val = 0.0;
 815             }
 816             fr->dadx[n++] = dadx_val;
 817
 818
 819             /* ai -> aj interaction */
 820             if(raj < dr + sk_ai)
 821             {
 822                 lij     = 1.0/(dr-sk_ai);
 823                 dlij    = 1.0;
 824                 raj_inv = 1.0/raj;
 825
 826                 if(raj>dr-sk_ai)
 827                 {
 828                     lij = raj_inv;
 829                     dlij = 0.0;
 830                 }
 831
 832                 lij2     = lij  * lij;
 833                 lij3     = lij2 * lij;
 834
 835                 uij      = 1.0/(dr+sk_ai);
 836                 uij2     = uij  * uij;
 837                 uij3     = uij2 * uij;
 838
 839                 diff2    = uij2-lij2;
 840
 841                 lij_inv  = gmx_invsqrt(lij2);
 842                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
 843                 sk2_rinv = sk2*rinv;
 844                 prod     = 0.25 * sk2_rinv;
 845
 846                 /* log_term = table_log(uij*lij_inv,born->log_table,
 847                    LOG_TABLE_ACCURACY); */
 848                 log_term = log(uij*lij_inv);
 849
 850                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term +
 851                            prod*(-diff2);
 852
 853                 if(raj<sk_ai-dr)
 854                 {
 855                     tmp     = tmp + 2.0 * (raj_inv-lij);
 856                 }
 857
 858                 /* duij = 1.0 */
 859                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
 860                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
 861                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
 862
 863                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
 864                 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule    */
 865
 866                 born->gpol_hct_work[aj] += 0.5*tmp;
 867             }
 868             else
 869             {
 870                 dadx_val = 0.0;
 871             }
 872             fr->dadx[n++] = dadx_val;
 873         }
 874
 875         born->gpol_hct_work[ai] += sum_ai;
 876     }
 877
 878     /* Parallel summations */
 879     if(PARTDECOMP(cr))
 880     {
 881         gmx_sum(natoms, born->gpol_hct_work, cr);
 882     }
 883     else if(DOMAINDECOMP(cr))
 884     {
 885         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
 886     }
 887
 888     for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
 889     {
 890                 if(born->use[i] != 0)
 891         {
 892             rai     = top->atomtypes.gb_radius[md->typeA[i]]-doffset;
 893             sum_ai  = 1.0/rai - born->gpol_hct_work[i];
 894             min_rad = rai + doffset;
 895             rad     = 1.0/sum_ai;
 896
 897             born->bRad[i]   = rad > min_rad ? rad : min_rad;
 898             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
 899         }
 900     }
 901
 902     /* Extra communication required for DD */
 903     if(DOMAINDECOMP(cr))
 904     {
 905         dd_atom_spread_real(cr->dd, born->bRad);
 906         dd_atom_spread_real(cr->dd, fr->invsqrta);
 907     }
 908
 909
 910     return 0;
 911 }
 912
 913 static int
 914 calc_gb_rad_obc(t_commrec *cr, t_forcerec *fr, int natoms, gmx_localtop_t *top,
 915                     const t_atomtypes *atype, rvec x[], t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md)
 916 {
 917     int i,k,ai,aj,nj0,nj1,n,at0,at1;
 918     int shift;
 919     real shX,shY,shZ;
 920     real rai,raj,gpi,dr2,dr,sk,sk2,lij,uij,diff2,tmp,sum_ai;
 921     real rad, min_rad,sum_ai2,sum_ai3,tsum,tchain,rinv,rai_inv,lij_inv,rai_inv2;
 922     real log_term,prod,sk2_rinv,sk_ai,sk2_ai;
 923     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11;
 924     real lij2,uij2,lij3,uij3,dlij,duij,t1,t2,t3;
 925     real doffset,raj_inv,dadx_val;
 926     real *gb_radius;
 927
 928     /* Keep the compiler happy */
 929     n    = 0;
 930     prod = 0;
 931     raj  = 0;
 932
 933     doffset = born->gb_doffset;
 934     gb_radius = born->gb_radius;
 935
 936     for(i=0;i<born->nr;i++)
 937     {
 938         born->gpol_hct_work[i] = 0;
 939     }
 940
 941     for(i=0;i<nl->nri;i++)
 942     {
 943         ai      = nl->iinr[i];
 944
 945         nj0     = nl->jindex[i];
 946         nj1     = nl->jindex[i+1];
 947
 948         /* Load shifts for this list */
 949         shift   = nl->shift[i];
 950         shX     = fr->shift_vec[shift][0];
 951         shY     = fr->shift_vec[shift][1];
 952         shZ     = fr->shift_vec[shift][2];
 953
 954         rai      = gb_radius[ai];
 955         rai_inv  = 1.0/rai;
 956
 957         sk_ai    = born->param[ai];
 958         sk2_ai   = sk_ai*sk_ai;
 959
 960         /* Load atom i coordinates, add shift vectors */
 961         ix1      = shX + x[ai][0];
 962         iy1      = shY + x[ai][1];
 963         iz1      = shZ + x[ai][2];
 964
 965         sum_ai   = 0;
 966
 967         for(k=nj0;k<nj1;k++)
 968         {
 969             aj    = nl->jjnr[k];
 970
 971             jx1   = x[aj][0];
 972             jy1   = x[aj][1];
 973             jz1   = x[aj][2];
 974
 975             dx11  = ix1 - jx1;
 976             dy11  = iy1 - jy1;
 977             dz11  = iz1 - jz1;
 978
 979             dr2   = dx11*dx11+dy11*dy11+dz11*dz11;
 980             rinv  = gmx_invsqrt(dr2);
 981             dr    = dr2*rinv;
 982
 983             /* sk is precalculated in init_gb() */
 984             sk    = born->param[aj];
 985             raj   = gb_radius[aj];
 986
 987             /* aj -> ai interaction */
 988             if(rai < dr+sk)
 989             {
 990                 lij       = 1.0/(dr-sk);
 991                 dlij      = 1.0;
 992
 993                 if(rai>dr-sk)
 994                 {
 995                     lij  = rai_inv;
 996                     dlij = 0.0;
 997                 }
 998
 999                 uij      = 1.0/(dr+sk);
1000                 lij2     = lij  * lij;
1001                 lij3     = lij2 * lij;
1002                 uij2     = uij  * uij;
1003                 uij3     = uij2 * uij;
1004
1005                 diff2    = uij2-lij2;
1006
1007                 lij_inv  = gmx_invsqrt(lij2);
1008                 sk2      = sk*sk;
1009                 sk2_rinv = sk2*rinv;
1010                 prod     = 0.25*sk2_rinv;
1011
1012                 log_term = log(uij*lij_inv);
1013
1014                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1015                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
1016
1017                 if(rai < sk-dr)
1018                 {
1019                     tmp = tmp + 2.0 * (rai_inv-lij);
1020                 }
1021
1022                 /* duij    = 1.0; */
1023                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
1024                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
1025                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
1026
1027                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
1028
1029                 sum_ai += 0.5*tmp;
1030             }
1031             else
1032             {
1033                 dadx_val = 0.0;
1034             }
1035             fr->dadx[n++] = dadx_val;
1036
1037             /* ai -> aj interaction */
1038             if(raj < dr + sk_ai)
1039             {
1040                 lij     = 1.0/(dr-sk_ai);
1041                 dlij    = 1.0;
1042                 raj_inv = 1.0/raj;
1043
1044                 if(raj>dr-sk_ai)
1045                 {
1046                     lij = raj_inv;
1047                     dlij = 0.0;
1048                 }
1049
1050                 lij2     = lij  * lij;
1051                 lij3     = lij2 * lij;
1052
1053                 uij      = 1.0/(dr+sk_ai);
1054                 uij2     = uij  * uij;
1055                 uij3     = uij2 * uij;
1056
1057                 diff2    = uij2-lij2;
1058
1059                 lij_inv  = gmx_invsqrt(lij2);
1060                 sk2      =  sk2_ai; /* sk2_ai = sk_ai * sk_ai in i loop above */
1061                 sk2_rinv = sk2*rinv;
1062                 prod     = 0.25 * sk2_rinv;
1063
1064                 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1065                 log_term = log(uij*lij_inv);
1066
1067                 tmp      = lij-uij + 0.25*dr*diff2 + (0.5*rinv)*log_term + prod*(-diff2);
1068
1069                 if(raj<sk_ai-dr)
1070                 {
1071                     tmp     = tmp + 2.0 * (raj_inv-lij);
1072                 }
1073
1074                 t1      = 0.5*lij2 + prod*lij3 - 0.25*(lij*rinv+lij3*dr);
1075                 t2      = -0.5*uij2 - 0.25*sk2_rinv*uij3 + 0.25*(uij*rinv+uij3*dr);
1076                 t3      = 0.125*(1.0+sk2_rinv*rinv)*(-diff2)+0.25*log_term*rinv*rinv;
1077
1078                 dadx_val = (dlij*t1+t2+t3)*rinv; /* rb2 is moved to chainrule    */
1079
1080                 born->gpol_hct_work[aj] += 0.5*tmp;
1081
1082             }
1083             else
1084             {
1085                 dadx_val = 0.0;
1086             }
1087             fr->dadx[n++] = dadx_val;
1088
1089         }
1090         born->gpol_hct_work[ai] += sum_ai;
1091
1092     }
1093
1094     /* Parallel summations */
1095     if(PARTDECOMP(cr))
1096     {
1097         gmx_sum(natoms, born->gpol_hct_work, cr);
1098     }
1099     else if(DOMAINDECOMP(cr))
1100     {
1101         dd_atom_sum_real(cr->dd, born->gpol_hct_work);
1102     }
1103
1104     for(i=0;i<fr->natoms_force;i++) /* PELA born->nr */
1105     {
1106                 if(born->use[i] != 0)
1107         {
1108             rai        = top->atomtypes.gb_radius[md->typeA[i]];
1109             rai_inv2   = 1.0/rai;
1110             rai        = rai-doffset;
1111             rai_inv    = 1.0/rai;
1112             sum_ai     = rai * born->gpol_hct_work[i];
1113             sum_ai2    = sum_ai  * sum_ai;
1114             sum_ai3    = sum_ai2 * sum_ai;
1115
1116             tsum    = tanh(born->obc_alpha*sum_ai-born->obc_beta*sum_ai2+born->obc_gamma*sum_ai3);
1117             born->bRad[i] = rai_inv - tsum*rai_inv2;
1118             born->bRad[i] = 1.0 / born->bRad[i];
1119
1120             fr->invsqrta[i] = gmx_invsqrt(born->bRad[i]);
1121
1122             tchain  = rai * (born->obc_alpha-2*born->obc_beta*sum_ai+3*born->obc_gamma*sum_ai2);
1123             born->drobc[i] = (1.0-tsum*tsum)*tchain*rai_inv2;
1124         }
1125     }
1126
1127     /* Extra (local) communication required for DD */
1128     if(DOMAINDECOMP(cr))
1129     {
1130         dd_atom_spread_real(cr->dd, born->bRad);
1131         dd_atom_spread_real(cr->dd, fr->invsqrta);
1132         dd_atom_spread_real(cr->dd, born->drobc);
1133     }
1134
1135     return 0;
1136
1137 }
1138
1139
1140
1141 int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *top,
1142                 const t_atomtypes *atype, rvec x[], t_nblist *nl, gmx_genborn_t *born,t_mdatoms *md,t_nrnb     *nrnb)
1143 {
1144     real *p;
1145     int   cnt;
1146     int ndadx;
1147
1148     if(fr->bAllvsAll && fr->dadx==NULL)
1149     {
1150         /* We might need up to 8 atoms of padding before and after,
1151          * and another 4 units to guarantee SSE alignment.
1152          */
1153         fr->nalloc_dadx = 2*(md->homenr+12)*(md->nr/2+1+12);
1154         snew(fr->dadx_rawptr,fr->nalloc_dadx);
1155         fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1156     }
1157     else
1158     {
1159         /* In the SSE-enabled gb-loops, when writing to dadx, we
1160          * always write 2*4 elements at a time, even in the case with only
1161          * 1-3 j particles, where we only really need to write 2*(1-3)
1162          * elements. This is because we want dadx to be aligned to a 16-
1163          * byte boundary, and being able to use _mm_store/load_ps
1164          */
1165         ndadx = 2 * (nl->nrj + 3*nl->nri);
1166
1167         /* First, reallocate the dadx array, we need 3 extra for SSE */
1168         if (ndadx + 3 > fr->nalloc_dadx)
1169         {
1170             fr->nalloc_dadx = over_alloc_large(ndadx) + 3;
1171             srenew(fr->dadx_rawptr,fr->nalloc_dadx);
1172             fr->dadx = (real *) (((size_t) fr->dadx_rawptr + 16) & (~((size_t) 15)));
1173         }
1174     }
1175
1176 #ifndef DOUBLE
1177     if(fr->bAllvsAll)
1178     {
1179         cnt = md->homenr*(md->nr/2+1);
1180
1181         if(ir->gb_algorithm==egbSTILL)
1182         {
1183 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1184             genborn_allvsall_calc_still_radii_sse2_single(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1185 #else
1186             genborn_allvsall_calc_still_radii(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
1187 #endif
1188             inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,cnt);
1189         }
1190         else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
1191         {
1192 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1193             genborn_allvsall_calc_hct_obc_radii_sse2_single(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1194 #else
1195             genborn_allvsall_calc_hct_obc_radii(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
1196 #endif
1197             inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,cnt);
1198         }
1199         else
1200         {
1201             gmx_fatal(FARGS,"Bad gb algorithm for all-vs-all interactions");
1202         }
1203         inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
1204
1205         return 0;
1206     }
1207 #endif
1208
1209     /* Switch for determining which algorithm to use for Born radii calculation */
1210 #ifdef GMX_DOUBLE
1211
1212 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1213     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1214     switch(ir->gb_algorithm)
1215     {
1216         case egbSTILL:
1217              calc_gb_rad_still_sse2_double(cr,fr,md->nr,top, atype, x[0], nl, born, md);
1218             break;
1219         case egbHCT:
1220              calc_gb_rad_hct_sse2_double(cr,fr,md->nr,top, atype, x[0], nl, born, md);
1221             break;
1222         case egbOBC:
1223             calc_gb_rad_obc_sse2_double(cr,fr,md->nr,top, atype, x[0], nl, born, md);
1224             break;
1225
1226         default:
1227             gmx_fatal(FARGS, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir->gb_algorithm);
1228     }
1229 #else
1230     switch(ir->gb_algorithm)
1231     {
1232         case egbSTILL:
1233             calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md);
1234             break;
1235         case egbHCT:
1236             calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md);
1237             break;
1238         case egbOBC:
1239             calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md);
1240             break;
1241
1242         default:
1243             gmx_fatal(FARGS, "Unknown double precision algorithm for Born radii calculation: %d",ir->gb_algorithm);
1244     }
1245
1246 #endif
1247
1248 #else
1249
1250 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ) )
1251     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1252     switch(ir->gb_algorithm)
1253     {
1254         case egbSTILL:
1255             calc_gb_rad_still_sse(cr,fr,born->nr,top, atype, x[0], nl, born, md);
1256             break;
1257         case egbHCT:
1258         case egbOBC:
1259             calc_gb_rad_hct_obc_sse(cr,fr,born->nr,top, atype, x[0], nl, born, md, ir->gb_algorithm);
1260             break;
1261
1262         default:
1263             gmx_fatal(FARGS, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir->gb_algorithm);
1264     }
1265
1266 #else
1267     switch(ir->gb_algorithm)
1268     {
1269         case egbSTILL:
1270             calc_gb_rad_still(cr,fr,born->nr,top,atype,x,nl,born,md);
1271             break;
1272         case egbHCT:
1273             calc_gb_rad_hct(cr,fr,born->nr,top,atype,x,nl,born,md);
1274             break;
1275         case egbOBC:
1276             calc_gb_rad_obc(cr,fr,born->nr,top,atype,x,nl,born,md);
1277             break;
1278
1279         default:
1280             gmx_fatal(FARGS, "Unknown algorithm for Born radii calculation: %d",ir->gb_algorithm);
1281     }
1282
1283 #endif /* Single precision sse */
1284
1285 #endif /* Double or single precision */
1286
1287     if(fr->bAllvsAll==FALSE)
1288     {
1289         switch(ir->gb_algorithm)
1290         {
1291             case egbSTILL:
1292                 inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nrj);
1293                 break;
1294             case egbHCT:
1295             case egbOBC:
1296                 inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nrj);
1297                 break;
1298
1299             default:
1300                 break;
1301         }
1302         inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,nl->nri);
1303     }
1304
1305     return 0;
1306 }
1307
1308
1309
1310 real gb_bonds_tab(rvec x[], rvec f[], rvec fshift[], real *charge, real *p_gbtabscale,
1311                   real *invsqrta, real *dvda, real *GBtab, t_idef *idef, real epsilon_r,
1312                   real gb_epsilon_solvent, real facel, const t_pbc *pbc, const t_graph *graph)
1313 {
1314     int i,j,n0,m,nnn,type,ai,aj;
1315         int ki;
1316
1317         real isai,isaj;
1318     real r,rsq11;
1319     real rinv11,iq;
1320     real isaprod,qq,gbscale,gbtabscale,Y,F,Geps,Heps2,Fp,VV,FF,rt,eps,eps2;
1321     real vgb,fgb,vcoul,fijC,dvdatmp,fscal,dvdaj;
1322     real vctot;
1323
1324         rvec dx;
1325         ivec dt;
1326
1327     t_iatom *forceatoms;
1328
1329     /* Scale the electrostatics by gb_epsilon_solvent */
1330     facel = facel * ((1.0/epsilon_r) - 1.0/gb_epsilon_solvent);
1331
1332     gbtabscale=*p_gbtabscale;
1333     vctot = 0.0;
1334
1335     for(j=F_GB12;j<=F_GB14;j++)
1336     {
1337         forceatoms = idef->il[j].iatoms;
1338
1339         for(i=0;i<idef->il[j].nr; )
1340         {
1341             /* To avoid reading in the interaction type, we just increment i to pass over
1342              * the types in the forceatoms array, this saves some memory accesses
1343              */
1344             i++;
1345             ai            = forceatoms[i++];
1346             aj            = forceatoms[i++];
1347
1348                         ki            = pbc_rvec_sub(pbc,x[ai],x[aj],dx);
1349                         rsq11         = iprod(dx,dx);
1350
1351                         isai          = invsqrta[ai];
1352                         iq            = (-1)*facel*charge[ai];
1353
1354             rinv11        = gmx_invsqrt(rsq11);
1355             isaj          = invsqrta[aj];
1356             isaprod       = isai*isaj;
1357             qq            = isaprod*iq*charge[aj];
1358             gbscale       = isaprod*gbtabscale;
1359             r             = rsq11*rinv11;
1360             rt            = r*gbscale;
1361             n0            = rt;
1362             eps           = rt-n0;
1363             eps2          = eps*eps;
1364             nnn           = 4*n0;
1365             Y             = GBtab[nnn];
1366             F             = GBtab[nnn+1];
1367             Geps          = eps*GBtab[nnn+2];
1368             Heps2         = eps2*GBtab[nnn+3];
1369             Fp            = F+Geps+Heps2;
1370             VV            = Y+eps*Fp;
1371             FF            = Fp+Geps+2.0*Heps2;
1372             vgb           = qq*VV;
1373             fijC          = qq*FF*gbscale;
1374             dvdatmp       = -(vgb+fijC*r)*0.5;
1375             dvda[aj]      = dvda[aj] + dvdatmp*isaj*isaj;
1376             dvda[ai]      = dvda[ai] + dvdatmp*isai*isai;
1377             vctot         = vctot + vgb;
1378             fgb           = -(fijC)*rinv11;
1379
1380                         if (graph) {
1381                                 ivec_sub(SHIFT_IVEC(graph,ai),SHIFT_IVEC(graph,aj),dt);
1382                                 ki=IVEC2IS(dt);
1383                         }
1384
1385                         for (m=0; (m<DIM); m++) {                       /*  15          */
1386                                 fscal=fgb*dx[m];
1387                                 f[ai][m]+=fscal;
1388                                 f[aj][m]-=fscal;
1389                                 fshift[ki][m]+=fscal;
1390                                 fshift[CENTRAL][m]-=fscal;
1391                         }
1392         }
1393     }
1394
1395     return vctot;
1396 }
1397
1398 real calc_gb_selfcorrections(t_commrec *cr, int natoms,
1399                  real *charge, gmx_genborn_t *born, real *dvda, t_mdatoms *md, double facel)
1400 {
1401     int i,ai,at0,at1;
1402     real rai,e,derb,q,q2,fi,rai_inv,vtot;
1403
1404     if(PARTDECOMP(cr))
1405     {
1406         pd_at_range(cr,&at0,&at1);
1407     }
1408     else if(DOMAINDECOMP(cr))
1409     {
1410         at0=0;
1411         at1=cr->dd->nat_home;
1412     }
1413     else
1414     {
1415         at0=0;
1416         at1=natoms;
1417
1418     }
1419
1420     /* Scale the electrostatics by gb_epsilon_solvent */
1421     facel = facel * ((1.0/born->epsilon_r) - 1.0/born->gb_epsilon_solvent);
1422
1423     vtot=0.0;
1424
1425     /* Apply self corrections */
1426     for(i=at0;i<at1;i++)
1427     {
1428         ai       = i;
1429
1430         if(born->use[ai]==1)
1431         {
1432             rai      = born->bRad[ai];
1433             rai_inv  = 1.0/rai;
1434             q        = charge[ai];
1435             q2       = q*q;
1436             fi       = facel*q2;
1437             e        = fi*rai_inv;
1438             derb     = 0.5*e*rai_inv*rai_inv;
1439             dvda[ai] += derb*rai;
1440             vtot     -= 0.5*e;
1441         }
1442     }
1443
1444    return vtot;
1445
1446 }
1447
1448 real calc_gb_nonpolar(t_commrec *cr, t_forcerec *fr,int natoms,gmx_genborn_t *born, gmx_localtop_t *top,
1449                       const t_atomtypes *atype, real *dvda,int gb_algorithm, t_mdatoms *md)
1450 {
1451     int ai,i,at0,at1;
1452     real e,es,rai,rbi,term,probe,tmp,factor;
1453     real rbi_inv,rbi_inv2;
1454
1455     /* To keep the compiler happy */
1456     factor=0;
1457
1458     if(PARTDECOMP(cr))
1459     {
1460         pd_at_range(cr,&at0,&at1);
1461     }
1462     else if(DOMAINDECOMP(cr))
1463     {
1464         at0 = 0;
1465         at1 = cr->dd->nat_home;
1466     }
1467     else
1468     {
1469         at0=0;
1470         at1=natoms;
1471     }
1472
1473     /* The surface area factor is 0.0049 for Still model, 0.0054 for HCT/OBC */
1474     if(gb_algorithm==egbSTILL)
1475     {
1476         factor=0.0049*100*CAL2JOULE;
1477     }
1478     else
1479     {
1480         factor=0.0054*100*CAL2JOULE;
1481     }
1482
1483     /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1484
1485     es    = 0;
1486     probe = 0.14;
1487     term  = M_PI*4;
1488
1489     for(i=at0;i<at1;i++)
1490     {
1491         ai        = i;
1492
1493         if(born->use[ai]==1)
1494         {
1495             rai          = top->atomtypes.gb_radius[md->typeA[ai]];
1496             rbi_inv   = fr->invsqrta[ai];
1497             rbi_inv2  = rbi_inv * rbi_inv;
1498             tmp       = (rai*rbi_inv2)*(rai*rbi_inv2);
1499             tmp       = tmp*tmp*tmp;
1500             e         = factor*term*(rai+probe)*(rai+probe)*tmp;
1501             dvda[ai]  = dvda[ai] - 6*e*rbi_inv2;
1502             es        = es + e;
1503         }
1504     }
1505
1506     return es;
1507 }
1508
1509
1510
1511 real calc_gb_chainrule(int natoms, t_nblist *nl, real *dadx, real *dvda, rvec x[], rvec t[], rvec fshift[],
1512                        rvec shift_vec[], int gb_algorithm, gmx_genborn_t *born, t_mdatoms *md)
1513 {
1514     int i,k,n,ai,aj,nj0,nj1,n0,n1;
1515     int shift;
1516     real shX,shY,shZ;
1517     real fgb,fij,rb2,rbi,fix1,fiy1,fiz1;
1518     real ix1,iy1,iz1,jx1,jy1,jz1,dx11,dy11,dz11,rsq11;
1519     real rinv11,tx,ty,tz,rbai,rbaj,fgb_ai;
1520     real *rb;
1521     volatile int idx;
1522
1523     n  = 0;
1524     rb = born->work;
1525
1526
1527     n0 = md->start;
1528     n1 = md->start+md->homenr+1+natoms/2;
1529
1530     if(gb_algorithm==egbSTILL)
1531     {
1532         for(i=n0;i<n1;i++)
1533         {
1534             k = i % natoms;
1535             rbi   = born->bRad[k];
1536             rb[k] = (2 * rbi * rbi * dvda[k])/ONE_4PI_EPS0;
1537         }
1538     }
1539     else if(gb_algorithm==egbHCT)
1540     {
1541         for(i=n0;i<n1;i++)
1542         {
1543             k = i % natoms;
1544             rbi   = born->bRad[k];
1545             rb[k] = rbi * rbi * dvda[k];
1546         }
1547     }
1548     else if(gb_algorithm==egbOBC)
1549     {
1550         for(i=n0;i<n1;i++)
1551         {
1552             k = i % natoms;
1553             rbi   = born->bRad[k];
1554             rb[k] = rbi * rbi * born->drobc[k] * dvda[k];
1555         }
1556     }
1557
1558     for(i=0;i<nl->nri;i++)
1559     {
1560         ai   = nl->iinr[i];
1561
1562         nj0     = nl->jindex[ai];
1563         nj1  = nl->jindex[ai+1];
1564
1565         /* Load shifts for this list */
1566         shift   = nl->shift[i];
1567         shX     = shift_vec[shift][0];
1568         shY     = shift_vec[shift][1];
1569         shZ     = shift_vec[shift][2];
1570
1571         /* Load atom i coordinates, add shift vectors */
1572         ix1  = shX + x[ai][0];
1573         iy1  = shY + x[ai][1];
1574         iz1  = shZ + x[ai][2];
1575
1576         fix1 = 0;
1577         fiy1 = 0;
1578         fiz1 = 0;
1579
1580         rbai = rb[ai];
1581
1582         for(k=nj0;k<nj1;k++)
1583         {
1584             aj = nl->jjnr[k];
1585
1586             jx1     = x[aj][0];
1587             jy1     = x[aj][1];
1588             jz1     = x[aj][2];
1589
1590             dx11    = ix1 - jx1;
1591             dy11    = iy1 - jy1;
1592             dz11    = iz1 - jz1;
1593
1594             rbaj    = rb[aj];
1595
1596             fgb     = rbai*dadx[n++];
1597             fgb_ai  = rbaj*dadx[n++];
1598
1599             /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1600             fgb     = fgb + fgb_ai;
1601
1602             tx      = fgb * dx11;
1603             ty      = fgb * dy11;
1604             tz      = fgb * dz11;
1605
1606             fix1    = fix1 + tx;
1607             fiy1    = fiy1 + ty;
1608             fiz1    = fiz1 + tz;
1609
1610             /* Update force on atom aj */
1611             t[aj][0] = t[aj][0] - tx;
1612             t[aj][1] = t[aj][1] - ty;
1613             t[aj][2] = t[aj][2] - tz;
1614         }
1615
1616         /* Update force and shift forces on atom ai */
1617         t[ai][0] = t[ai][0] + fix1;
1618         t[ai][1] = t[ai][1] + fiy1;
1619         t[ai][2] = t[ai][2] + fiz1;
1620
1621         fshift[shift][0] = fshift[shift][0] + fix1;
1622         fshift[shift][1] = fshift[shift][1] + fiy1;
1623         fshift[shift][2] = fshift[shift][2] + fiz1;
1624
1625     }
1626
1627     return 0;
1628 }
1629
1630
1631 real calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t *top, const t_atomtypes *atype,
1632                     rvec x[], rvec f[], t_forcerec *fr, t_idef *idef, int gb_algorithm, t_nrnb *nrnb, bool bRad,
1633                                         const t_pbc *pbc, const t_graph *graph)
1634 {
1635     real v=0;
1636     int  cnt;
1637
1638         /* PBC or not? */
1639         const t_pbc *pbc_null;
1640
1641         if (fr->bMolPBC)
1642                 pbc_null = pbc;
1643         else
1644                 pbc_null = NULL;
1645
1646
1647
1648     /* Do a simple ACE type approximation for the non-polar solvation */
1649     v += calc_gb_nonpolar(cr, fr,born->nr, born, top, atype, fr->dvda, gb_algorithm,md);
1650
1651     /* Calculate the bonded GB-interactions using either table or analytical formula */
1652 #ifdef GMX_DOUBLE
1653     v += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
1654                       fr->invsqrta,fr->dvda,fr->gbtab.tab,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1655 #else
1656 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )   /*
1657     v += gb_bonds_analytic(x[0],f[0],md->chargeA,born->bRad,fr->dvda,idef,born->epsilon_r,born->gb_epsilon_solvent,fr->epsfac);
1658                                                                                   */
1659     v += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
1660                       fr->invsqrta,fr->dvda,fr->gbtab.tab,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1661
1662 #else
1663     v += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
1664                       fr->invsqrta,fr->dvda,fr->gbtab.tab,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
1665 #endif
1666 #endif
1667
1668     /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1669     v += calc_gb_selfcorrections(cr,born->nr,md->chargeA, born, fr->dvda, md, fr->epsfac);
1670
1671     /* If parallel, sum the derivative of the potential w.r.t the born radii */
1672     if(PARTDECOMP(cr))
1673     {
1674         gmx_sum(md->nr,fr->dvda, cr);
1675     }
1676     else if(DOMAINDECOMP(cr))
1677     {
1678         dd_atom_sum_real(cr->dd,fr->dvda);
1679         dd_atom_spread_real(cr->dd,fr->dvda);
1680     }
1681
1682 #ifndef DOUBLE
1683     if(fr->bAllvsAll)
1684     {
1685 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1686         genborn_allvsall_calc_chainrule_sse2_single(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1687 #else
1688         genborn_allvsall_calc_chainrule(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
1689 #endif
1690         cnt = md->homenr*(md->nr/2+1);
1691         inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,cnt);
1692         inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
1693         return v;
1694     }
1695 #endif
1696
1697 #ifdef GMX_DOUBLE
1698
1699 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1700      calc_gb_chainrule_sse2_double(born->nr, &(fr->gblist), fr->dadx, fr->dvda,
1701                                    x[0], f[0], fr->fshift[0],  fr->shift_vec[0],
1702                                    gb_algorithm, born);
1703 #else
1704     calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda,
1705                       x, f, fr->fshift, fr->shift_vec,
1706                       gb_algorithm, born, md);
1707 #endif
1708
1709 #else
1710
1711 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ))
1712     /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1713     calc_gb_chainrule_sse(born->nr, &(fr->gblist), fr->dadx, fr->dvda,
1714                           x[0], f[0], fr->fshift[0], fr->shift_vec[0],
1715                           gb_algorithm, born);
1716 #else
1717     /* Calculate the forces due to chain rule terms with non sse code */
1718     calc_gb_chainrule(born->nr, &(fr->gblist), fr->dadx, fr->dvda,
1719                       x, f, fr->fshift, fr->shift_vec,
1720                       gb_algorithm, born, md);
1721 #endif
1722 #endif
1723
1724     if(!fr->bAllvsAll)
1725     {
1726         inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nrj);
1727         inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fr->gblist.nri);
1728
1729     }
1730
1731     return v;
1732
1733 }
1734
1735 static void add_j_to_gblist(gbtmpnbl_t *list,int aj)
1736 {
1737     if (list->naj >= list->aj_nalloc)
1738     {
1739         list->aj_nalloc = over_alloc_large(list->naj+1);
1740         srenew(list->aj,list->aj_nalloc);
1741     }
1742
1743     list->aj[list->naj++] = aj;
1744 }
1745
1746 static gbtmpnbl_t *find_gbtmplist(struct gbtmpnbls *lists,int shift)
1747 {
1748     int ind,i;
1749
1750     /* Search the list with the same shift, if there is one */
1751     ind = 0;
1752     while (ind < lists->nlist && shift != lists->list[ind].shift)
1753     {
1754         ind++;
1755     }
1756     if (ind == lists->nlist)
1757     {
1758         if (lists->nlist == lists->list_nalloc)
1759         {
1760             lists->list_nalloc++;
1761             srenew(lists->list,lists->list_nalloc);
1762             for(i=lists->nlist; i<lists->list_nalloc; i++)
1763             {
1764                 lists->list[i].aj        = NULL;
1765                 lists->list[i].aj_nalloc = 0;
1766             }
1767
1768         }
1769
1770         lists->list[lists->nlist].shift = shift;
1771         lists->list[lists->nlist].naj   = 0;
1772         lists->nlist++;
1773     }
1774
1775     return &lists->list[ind];
1776 }
1777
1778 static void add_bondeds_to_gblist(t_ilist *il,
1779                                   bool bMolPBC,t_pbc *pbc,t_graph *g,rvec *x,
1780                                   struct gbtmpnbls *nls)
1781 {
1782     int  ind,j,ai,aj,shift,found;
1783     rvec dx;
1784     ivec dt;
1785     gbtmpnbl_t *list;
1786
1787     shift = CENTRAL;
1788     for(ind=0; ind<il->nr; ind+=3)
1789     {
1790         ai = il->iatoms[ind+1];
1791         aj = il->iatoms[ind+2];
1792
1793         shift = CENTRAL;
1794         if (g != NULL)
1795         {
1796           rvec_sub(x[ai],x[aj],dx);
1797           ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
1798           shift = IVEC2IS(dt);
1799         }
1800         else if (bMolPBC)
1801         {
1802           shift = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
1803         }
1804
1805         /* Find the list for this shift or create one */
1806         list = find_gbtmplist(&nls[ai],shift);
1807
1808         found=0;
1809
1810         /* So that we do not add the same bond twice.
1811          * This happens with some constraints between 1-3 atoms
1812          * that are in the bond-list but should not be in the GB nb-list */
1813         for(j=0;j<list->naj;j++)
1814         {
1815             if (list->aj[j] == aj)
1816             {
1817                 found = 1;
1818             }
1819         }
1820
1821         if (found == 0)
1822         {
1823                         if(ai == aj)
1824                         {
1825                                 gmx_incons("ai == aj");
1826                         }
1827
1828             add_j_to_gblist(list,aj);
1829         }
1830     }
1831 }
1832
1833 static int
1834 compare_int (const void * a, const void * b)
1835 {
1836     return ( *(int*)a - *(int*)b );
1837 }
1838
1839
1840
1841 int make_gb_nblist(t_commrec *cr, int gb_algorithm, real gbcut,
1842                    rvec x[], matrix box,
1843                    t_forcerec *fr, t_idef *idef, t_graph *graph, gmx_genborn_t *born)
1844 {
1845     int i,l,ii,j,k,n,nj0,nj1,ai,aj,at0,at1,found,shift,s;
1846     int apa;
1847     t_nblist *nblist;
1848     t_pbc pbc;
1849
1850     struct gbtmpnbls *nls;
1851     gbtmpnbl_t *list =NULL;
1852
1853     nls   = born->nblist_work;
1854
1855     for(i=0;i<born->nr;i++)
1856     {
1857         nls[i].nlist = 0;
1858     }
1859
1860     if (fr->bMolPBC)
1861     {
1862         set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
1863     }
1864
1865     switch (gb_algorithm)
1866     {
1867     case egbHCT:
1868     case egbOBC:
1869         /* Loop over 1-2, 1-3 and 1-4 interactions */
1870         for(j=F_GB12;j<=F_GB14;j++)
1871         {
1872             add_bondeds_to_gblist(&idef->il[j],fr->bMolPBC,&pbc,graph,x,nls);
1873         }
1874         break;
1875     case egbSTILL:
1876         /* Loop over 1-4 interactions */
1877         add_bondeds_to_gblist(&idef->il[F_GB14],fr->bMolPBC,&pbc,graph,x,nls);
1878         break;
1879     default:
1880         gmx_incons("Unknown GB algorithm");
1881     }
1882
1883     /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1884     for(n=0; (n<fr->nnblists); n++)
1885     {
1886         for(i=0; (i<eNL_NR); i++)
1887         {
1888             nblist=&(fr->nblists[n].nlist_sr[i]);
1889
1890             if (nblist->nri > 0 && (i==eNL_VDWQQ || i==eNL_QQ))
1891             {
1892                 for(j=0;j<nblist->nri;j++)
1893                 {
1894                     ai    = nblist->iinr[j];
1895                     shift = nblist->shift[j];
1896
1897                     /* Find the list for this shift or create one */
1898                     list = find_gbtmplist(&nls[ai],shift);
1899
1900                     nj0 = nblist->jindex[j];
1901                     nj1 = nblist->jindex[j+1];
1902
1903                     /* Add all the j-atoms in the non-bonded list to the GB list */
1904                     for(k=nj0;k<nj1;k++)
1905                     {
1906                         add_j_to_gblist(list,nblist->jjnr[k]);
1907                     }
1908                 }
1909             }
1910         }
1911     }
1912
1913     /* Zero out some counters */
1914         fr->gblist.nri=0;
1915     fr->gblist.nrj=0;
1916
1917         fr->gblist.jindex[0] = fr->gblist.nri;
1918
1919         for(i=0;i<fr->natoms_force;i++)
1920     {
1921         for(s=0; s<nls[i].nlist; s++)
1922         {
1923             list = &nls[i].list[s];
1924
1925             /* Only add those atoms that actually have neighbours */
1926             if (born->use[i] != 0)
1927             {
1928                 fr->gblist.iinr[fr->gblist.nri]  = i;
1929                 fr->gblist.shift[fr->gblist.nri] = list->shift;
1930                 fr->gblist.nri++;
1931
1932                 for(k=0; k<list->naj; k++)
1933                 {
1934                     /* Memory allocation for jjnr */
1935                     if(fr->gblist.nrj >= fr->gblist.maxnrj)
1936                     {
1937                         fr->gblist.maxnrj += over_alloc_large(fr->gblist.maxnrj);
1938
1939                         if (debug)
1940                         {
1941                             fprintf(debug,"Increasing GB neighbourlist j size to %d\n",fr->gblist.maxnrj);
1942                         }
1943
1944                         srenew(fr->gblist.jjnr,fr->gblist.maxnrj);
1945                     }
1946
1947                     /* Put in list */
1948                                         if(i == list->aj[k])
1949                                         {
1950                                                 gmx_incons("i == list->aj[k]");
1951                                         }
1952                     fr->gblist.jjnr[fr->gblist.nrj++] = list->aj[k];
1953                 }
1954
1955                                 fr->gblist.jindex[fr->gblist.nri] = fr->gblist.nrj;
1956             }
1957                 }
1958         }
1959
1960
1961 #ifdef SORT_GB_LIST
1962     for(i=0;i<fr->gblist.nri;i++)
1963     {
1964         nj0 = fr->gblist.jindex[i];
1965         nj1 = fr->gblist.jindex[i+1];
1966         ai  = fr->gblist.iinr[i];
1967
1968         /* Temporary fix */
1969                 for(j=nj0;j<nj1;j++)
1970                 {
1971             if(fr->gblist.jjnr[j]<ai)
1972                 fr->gblist.jjnr[j]+=fr->natoms_force;
1973         }
1974         qsort(fr->gblist.jjnr+nj0,nj1-nj0,sizeof(int),compare_int);
1975         /* Fix back */
1976         for(j=nj0;j<nj1;j++)
1977         {
1978             if(fr->gblist.jjnr[j]>=fr->natoms_force)
1979                 fr->gblist.jjnr[j]-=fr->natoms_force;
1980         }
1981
1982     }
1983 #endif
1984
1985     return 0;
1986 }
1987
1988 void make_local_gb(const t_commrec *cr, gmx_genborn_t *born, int gb_algorithm)
1989 {
1990     int i,at0,at1;
1991     gmx_domdec_t *dd=NULL;
1992
1993     if(DOMAINDECOMP(cr))
1994     {
1995         dd = cr->dd;
1996         at0 = 0;
1997         at1 = dd->nat_tot;
1998     }
1999     else
2000     {
2001         /* Single node or particle decomp (global==local), just copy pointers and return */
2002         if(gb_algorithm==egbSTILL)
2003         {
2004             born->gpol      = born->gpol_globalindex;
2005             born->vsolv     = born->vsolv_globalindex;
2006             born->gb_radius = born->gb_radius_globalindex;
2007         }
2008         else
2009         {
2010             born->param     = born->param_globalindex;
2011             born->gb_radius = born->gb_radius_globalindex;
2012         }
2013
2014         born->use = born->use_globalindex;
2015
2016         return;
2017     }
2018
2019     /* Reallocation of local arrays if necessary */
2020     if(born->nlocal < dd->nat_tot)
2021     {
2022         born->nlocal = dd->nat_tot;
2023
2024         /* Arrays specific to different gb algorithms */
2025         if(gb_algorithm==egbSTILL)
2026         {
2027             srenew(born->gpol,  born->nlocal+3);
2028             srenew(born->vsolv, born->nlocal+3);
2029             srenew(born->gb_radius, born->nlocal+3);
2030         }
2031         else
2032         {
2033             srenew(born->param, born->nlocal+3);
2034             srenew(born->gb_radius, born->nlocal+3);
2035         }
2036
2037         /* All gb-algorithms use the array for vsites exclusions */
2038         srenew(born->use,    born->nlocal+3);
2039     }
2040
2041     /* With dd, copy algorithm specific arrays */
2042     if(gb_algorithm==egbSTILL)
2043     {
2044         for(i=at0;i<at1;i++)
2045         {
2046             born->gpol[i]  = born->gpol_globalindex[dd->gatindex[i]];
2047             born->vsolv[i] = born->vsolv_globalindex[dd->gatindex[i]];
2048             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2049             born->use[i]   = born->use_globalindex[dd->gatindex[i]];
2050         }
2051     }
2052     else
2053     {
2054         for(i=at0;i<at1;i++)
2055         {
2056             born->param[i]     = born->param_globalindex[dd->gatindex[i]];
2057             born->gb_radius[i] = born->gb_radius_globalindex[dd->gatindex[i]];
2058             born->use[i]       = born->use_globalindex[dd->gatindex[i]];
2059         }
2060     }
2061 }
2062