Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_double / f77dkernel220.f
blob019788775d4e6a6799dc42e7e1e6bd0cb301eb0f
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77dkernel220
33 C Coulomb interaction: Reaction field
34 C VdW interaction: Buckingham
35 C water optimization: No
36 C Calculate forces: yes
38 subroutine f77dkernel220(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*8 Vvdw(*),tabscale,VFtab(*)
76 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*8 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*8 shX,shY,shZ
83 real*8 fscal,tx,ty,tz
84 real*8 rinvsq
85 real*8 iq
86 real*8 qq,vcoul,vctot
87 integer*4 nti
88 integer*4 tj
89 real*8 rinvsix
90 real*8 Vvdw6,Vvdwtot
91 real*8 krsq
92 real*8 Vvdwexp,br
93 real*8 ix1,iy1,iz1,fix1,fiy1,fiz1
94 real*8 jx1,jy1,jz1
95 real*8 dx11,dy11,dz11,rsq11,rinv11
96 real*8 c6,cexp1,cexp2
99 C Reset outer and inner iteration counters
100 nouter = 0
101 ninner = 0
103 C Loop over thread workunits
104 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
105 if(nn1.gt.nri) nn1=nri
107 C Start outer loop over neighborlists
109 do n=nn0+1,nn1
111 C Load shift vector for this list
112 is3 = 3*shift(n)+1
113 shX = shiftvec(is3)
114 shY = shiftvec(is3+1)
115 shZ = shiftvec(is3+2)
117 C Load limits for loop over neighbors
118 nj0 = jindex(n)+1
119 nj1 = jindex(n+1)
121 C Get outer coordinate index
122 ii = iinr(n)+1
123 ii3 = 3*ii-2
125 C Load i atom data, add shift vector
126 ix1 = shX + pos(ii3+0)
127 iy1 = shY + pos(ii3+1)
128 iz1 = shZ + pos(ii3+2)
130 C Load parameters for i atom
131 iq = facel*charge(ii)
132 nti = 3*ntype*type(ii)
134 C Zero the potential energy for this list
135 vctot = 0
136 Vvdwtot = 0
138 C Clear i atom forces
139 fix1 = 0
140 fiy1 = 0
141 fiz1 = 0
143 do k=nj0,nj1
145 C Get j neighbor index, and coordinate index
146 jnr = jjnr(k)+1
147 j3 = 3*jnr-2
149 C load j atom coordinates
150 jx1 = pos(j3+0)
151 jy1 = pos(j3+1)
152 jz1 = pos(j3+2)
154 C Calculate distance
155 dx11 = ix1 - jx1
156 dy11 = iy1 - jy1
157 dz11 = iz1 - jz1
158 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
160 C Calculate 1/r and 1/r2
161 rinv11 = 1.0/sqrt(rsq11)
163 C Load parameters for j atom
164 qq = iq*charge(jnr)
165 tj = nti+3*type(jnr)+1
166 c6 = vdwparam(tj)
167 cexp1 = vdwparam(tj+1)
168 cexp2 = vdwparam(tj+2)
169 rinvsq = rinv11*rinv11
171 C Coulomb reaction-field interaction
172 krsq = krf*rsq11
173 vcoul = qq*(rinv11+krsq-crf)
174 vctot = vctot+vcoul
176 C Buckingham interaction
177 rinvsix = rinvsq*rinvsq*rinvsq
178 Vvdw6 = c6*rinvsix
179 br = cexp2*rsq11*rinv11
180 Vvdwexp = cexp1*exp(-br)
181 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6
182 fscal = (qq*(rinv11-2.0*krsq)+br*Vvdwexp
183 & -6.0*Vvdw6)*rinvsq
185 C Calculate temporary vectorial force
186 tx = fscal*dx11
187 ty = fscal*dy11
188 tz = fscal*dz11
190 C Increment i atom force
191 fix1 = fix1 + tx
192 fiy1 = fiy1 + ty
193 fiz1 = fiz1 + tz
195 C Decrement j atom force
196 faction(j3+0) = faction(j3+0) - tx
197 faction(j3+1) = faction(j3+1) - ty
198 faction(j3+2) = faction(j3+2) - tz
200 C Inner loop uses 75 flops/iteration
201 end do
204 C Add i forces to mem and shifted force list
205 faction(ii3+0) = faction(ii3+0) + fix1
206 faction(ii3+1) = faction(ii3+1) + fiy1
207 faction(ii3+2) = faction(ii3+2) + fiz1
208 fshift(is3) = fshift(is3)+fix1
209 fshift(is3+1) = fshift(is3+1)+fiy1
210 fshift(is3+2) = fshift(is3+2)+fiz1
212 C Add potential energies to the group for this list
213 ggid = gid(n)+1
214 Vc(ggid) = Vc(ggid) + vctot
215 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
217 C Increment number of inner iterations
218 ninner = ninner + nj1 - nj0
220 C Outer loop uses 12 flops/iteration
221 end do
224 C Increment number of outer iterations
225 nouter = nouter + nn1 - nn0
226 if(nn1.lt.nri) goto 10
228 C Write outer/inner iteration count to pointers
229 outeriter = nouter
230 inneriter = ninner
231 return
240 C Gromacs nonbonded kernel f77dkernel220nf
241 C Coulomb interaction: Reaction field
242 C VdW interaction: Buckingham
243 C water optimization: No
244 C Calculate forces: no
246 subroutine f77dkernel220nf(
247 & nri,
248 & iinr,
249 & jindex,
250 & jjnr,
251 & shift,
252 & shiftvec,
253 & fshift,
254 & gid,
255 & pos,
256 & faction,
257 & charge,
258 & facel,
259 & krf,
260 & crf,
261 & Vc,
262 & type,
263 & ntype,
264 & vdwparam,
265 & Vvdw,
266 & tabscale,
267 & VFtab,
268 & invsqrta,
269 & dvda,
270 & gbtabscale,
271 & GBtab,
272 & nthreads,
273 & count,
274 & mtx,
275 & outeriter,
276 & inneriter,
277 & work)
278 implicit none
279 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
280 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
281 integer*4 gid(*),type(*),ntype
282 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
283 real*8 Vvdw(*),tabscale,VFtab(*)
284 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
285 integer*4 nthreads,count,mtx,outeriter,inneriter
286 real*8 work(*)
288 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
289 integer*4 nn0,nn1,nouter,ninner
290 real*8 shX,shY,shZ
291 real*8 rinvsq
292 real*8 iq
293 real*8 qq,vcoul,vctot
294 integer*4 nti
295 integer*4 tj
296 real*8 rinvsix
297 real*8 Vvdw6,Vvdwtot
298 real*8 krsq
299 real*8 Vvdwexp,br
300 real*8 ix1,iy1,iz1
301 real*8 jx1,jy1,jz1
302 real*8 dx11,dy11,dz11,rsq11,rinv11
303 real*8 c6,cexp1,cexp2
306 C Reset outer and inner iteration counters
307 nouter = 0
308 ninner = 0
310 C Loop over thread workunits
311 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
312 if(nn1.gt.nri) nn1=nri
314 C Start outer loop over neighborlists
316 do n=nn0+1,nn1
318 C Load shift vector for this list
319 is3 = 3*shift(n)+1
320 shX = shiftvec(is3)
321 shY = shiftvec(is3+1)
322 shZ = shiftvec(is3+2)
324 C Load limits for loop over neighbors
325 nj0 = jindex(n)+1
326 nj1 = jindex(n+1)
328 C Get outer coordinate index
329 ii = iinr(n)+1
330 ii3 = 3*ii-2
332 C Load i atom data, add shift vector
333 ix1 = shX + pos(ii3+0)
334 iy1 = shY + pos(ii3+1)
335 iz1 = shZ + pos(ii3+2)
337 C Load parameters for i atom
338 iq = facel*charge(ii)
339 nti = 3*ntype*type(ii)
341 C Zero the potential energy for this list
342 vctot = 0
343 Vvdwtot = 0
345 C Clear i atom forces
347 do k=nj0,nj1
349 C Get j neighbor index, and coordinate index
350 jnr = jjnr(k)+1
351 j3 = 3*jnr-2
353 C load j atom coordinates
354 jx1 = pos(j3+0)
355 jy1 = pos(j3+1)
356 jz1 = pos(j3+2)
358 C Calculate distance
359 dx11 = ix1 - jx1
360 dy11 = iy1 - jy1
361 dz11 = iz1 - jz1
362 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
364 C Calculate 1/r and 1/r2
365 rinv11 = 1.0/sqrt(rsq11)
367 C Load parameters for j atom
368 qq = iq*charge(jnr)
369 tj = nti+3*type(jnr)+1
370 c6 = vdwparam(tj)
371 cexp1 = vdwparam(tj+1)
372 cexp2 = vdwparam(tj+2)
373 rinvsq = rinv11*rinv11
375 C Coulomb reaction-field interaction
376 krsq = krf*rsq11
377 vcoul = qq*(rinv11+krsq-crf)
378 vctot = vctot+vcoul
380 C Buckingham interaction
381 rinvsix = rinvsq*rinvsq*rinvsq
382 Vvdw6 = c6*rinvsix
383 br = cexp2*rsq11*rinv11
384 Vvdwexp = cexp1*exp(-br)
385 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6
387 C Inner loop uses 59 flops/iteration
388 end do
391 C Add i forces to mem and shifted force list
393 C Add potential energies to the group for this list
394 ggid = gid(n)+1
395 Vc(ggid) = Vc(ggid) + vctot
396 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
398 C Increment number of inner iterations
399 ninner = ninner + nj1 - nj0
401 C Outer loop uses 6 flops/iteration
402 end do
405 C Increment number of outer iterations
406 nouter = nouter + nn1 - nn0
407 if(nn1.lt.nri) goto 10
409 C Write outer/inner iteration count to pointers
410 outeriter = nouter
411 inneriter = ninner
412 return