Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_double / f77dkernel310.f
blob0134a1300e250951afee1721ae3d62feaa69f83e
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77dkernel310
33 C Coulomb interaction: Tabulated
34 C VdW interaction: Lennard-Jones
35 C water optimization: No
36 C Calculate forces: yes
38 subroutine f77dkernel310(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*8 Vvdw(*),tabscale,VFtab(*)
76 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*8 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*8 shX,shY,shZ
83 real*8 fscal,tx,ty,tz
84 real*8 rinvsq
85 real*8 iq
86 real*8 qq,vcoul,vctot
87 integer*4 nti
88 integer*4 tj
89 real*8 rinvsix
90 real*8 Vvdw6,Vvdwtot
91 real*8 Vvdw12
92 real*8 r,rt,eps,eps2
93 integer*4 n0,nnn
94 real*8 Y,F,Geps,Heps2,Fp,VV
95 real*8 FF
96 real*8 fijC
97 real*8 ix1,iy1,iz1,fix1,fiy1,fiz1
98 real*8 jx1,jy1,jz1
99 real*8 dx11,dy11,dz11,rsq11,rinv11
100 real*8 c6,c12
103 C Reset outer and inner iteration counters
104 nouter = 0
105 ninner = 0
107 C Loop over thread workunits
108 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
109 if(nn1.gt.nri) nn1=nri
111 C Start outer loop over neighborlists
113 do n=nn0+1,nn1
115 C Load shift vector for this list
116 is3 = 3*shift(n)+1
117 shX = shiftvec(is3)
118 shY = shiftvec(is3+1)
119 shZ = shiftvec(is3+2)
121 C Load limits for loop over neighbors
122 nj0 = jindex(n)+1
123 nj1 = jindex(n+1)
125 C Get outer coordinate index
126 ii = iinr(n)+1
127 ii3 = 3*ii-2
129 C Load i atom data, add shift vector
130 ix1 = shX + pos(ii3+0)
131 iy1 = shY + pos(ii3+1)
132 iz1 = shZ + pos(ii3+2)
134 C Load parameters for i atom
135 iq = facel*charge(ii)
136 nti = 2*ntype*type(ii)
138 C Zero the potential energy for this list
139 vctot = 0
140 Vvdwtot = 0
142 C Clear i atom forces
143 fix1 = 0
144 fiy1 = 0
145 fiz1 = 0
147 do k=nj0,nj1
149 C Get j neighbor index, and coordinate index
150 jnr = jjnr(k)+1
151 j3 = 3*jnr-2
153 C load j atom coordinates
154 jx1 = pos(j3+0)
155 jy1 = pos(j3+1)
156 jz1 = pos(j3+2)
158 C Calculate distance
159 dx11 = ix1 - jx1
160 dy11 = iy1 - jy1
161 dz11 = iz1 - jz1
162 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
164 C Calculate 1/r and 1/r2
165 rinv11 = 1.0/sqrt(rsq11)
167 C Load parameters for j atom
168 qq = iq*charge(jnr)
169 tj = nti+2*type(jnr)+1
170 c6 = vdwparam(tj)
171 c12 = vdwparam(tj+1)
172 rinvsq = rinv11*rinv11
174 C Calculate table index
175 r = rsq11*rinv11
177 C Calculate table index
178 rt = r*tabscale
179 n0 = rt
180 eps = rt-n0
181 eps2 = eps*eps
182 nnn = 4*n0+1
184 C Tabulated coulomb interaction
185 Y = VFtab(nnn)
186 F = VFtab(nnn+1)
187 Geps = eps*VFtab(nnn+2)
188 Heps2 = eps2*VFtab(nnn+3)
189 Fp = F+Geps+Heps2
190 VV = Y+eps*Fp
191 FF = Fp+Geps+2.0*Heps2
192 vcoul = qq*VV
193 fijC = qq*FF
194 vctot = vctot + vcoul
196 C Lennard-Jones interaction
197 rinvsix = rinvsq*rinvsq*rinvsq
198 Vvdw6 = c6*rinvsix
199 Vvdw12 = c12*rinvsix*rinvsix
200 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
201 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq
202 & -((fijC)*tabscale)*rinv11
204 C Calculate temporary vectorial force
205 tx = fscal*dx11
206 ty = fscal*dy11
207 tz = fscal*dz11
209 C Increment i atom force
210 fix1 = fix1 + tx
211 fiy1 = fiy1 + ty
212 fiz1 = fiz1 + tz
214 C Decrement j atom force
215 faction(j3+0) = faction(j3+0) - tx
216 faction(j3+1) = faction(j3+1) - ty
217 faction(j3+2) = faction(j3+2) - tz
219 C Inner loop uses 60 flops/iteration
220 end do
223 C Add i forces to mem and shifted force list
224 faction(ii3+0) = faction(ii3+0) + fix1
225 faction(ii3+1) = faction(ii3+1) + fiy1
226 faction(ii3+2) = faction(ii3+2) + fiz1
227 fshift(is3) = fshift(is3)+fix1
228 fshift(is3+1) = fshift(is3+1)+fiy1
229 fshift(is3+2) = fshift(is3+2)+fiz1
231 C Add potential energies to the group for this list
232 ggid = gid(n)+1
233 Vc(ggid) = Vc(ggid) + vctot
234 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
236 C Increment number of inner iterations
237 ninner = ninner + nj1 - nj0
239 C Outer loop uses 12 flops/iteration
240 end do
243 C Increment number of outer iterations
244 nouter = nouter + nn1 - nn0
245 if(nn1.lt.nri) goto 10
247 C Write outer/inner iteration count to pointers
248 outeriter = nouter
249 inneriter = ninner
250 return
259 C Gromacs nonbonded kernel f77dkernel310nf
260 C Coulomb interaction: Tabulated
261 C VdW interaction: Lennard-Jones
262 C water optimization: No
263 C Calculate forces: no
265 subroutine f77dkernel310nf(
266 & nri,
267 & iinr,
268 & jindex,
269 & jjnr,
270 & shift,
271 & shiftvec,
272 & fshift,
273 & gid,
274 & pos,
275 & faction,
276 & charge,
277 & facel,
278 & krf,
279 & crf,
280 & Vc,
281 & type,
282 & ntype,
283 & vdwparam,
284 & Vvdw,
285 & tabscale,
286 & VFtab,
287 & invsqrta,
288 & dvda,
289 & gbtabscale,
290 & GBtab,
291 & nthreads,
292 & count,
293 & mtx,
294 & outeriter,
295 & inneriter,
296 & work)
297 implicit none
298 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
299 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
300 integer*4 gid(*),type(*),ntype
301 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
302 real*8 Vvdw(*),tabscale,VFtab(*)
303 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
304 integer*4 nthreads,count,mtx,outeriter,inneriter
305 real*8 work(*)
307 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
308 integer*4 nn0,nn1,nouter,ninner
309 real*8 shX,shY,shZ
310 real*8 rinvsq
311 real*8 iq
312 real*8 qq,vcoul,vctot
313 integer*4 nti
314 integer*4 tj
315 real*8 rinvsix
316 real*8 Vvdw6,Vvdwtot
317 real*8 Vvdw12
318 real*8 r,rt,eps,eps2
319 integer*4 n0,nnn
320 real*8 Y,F,Geps,Heps2,Fp,VV
321 real*8 ix1,iy1,iz1
322 real*8 jx1,jy1,jz1
323 real*8 dx11,dy11,dz11,rsq11,rinv11
324 real*8 c6,c12
327 C Reset outer and inner iteration counters
328 nouter = 0
329 ninner = 0
331 C Loop over thread workunits
332 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
333 if(nn1.gt.nri) nn1=nri
335 C Start outer loop over neighborlists
337 do n=nn0+1,nn1
339 C Load shift vector for this list
340 is3 = 3*shift(n)+1
341 shX = shiftvec(is3)
342 shY = shiftvec(is3+1)
343 shZ = shiftvec(is3+2)
345 C Load limits for loop over neighbors
346 nj0 = jindex(n)+1
347 nj1 = jindex(n+1)
349 C Get outer coordinate index
350 ii = iinr(n)+1
351 ii3 = 3*ii-2
353 C Load i atom data, add shift vector
354 ix1 = shX + pos(ii3+0)
355 iy1 = shY + pos(ii3+1)
356 iz1 = shZ + pos(ii3+2)
358 C Load parameters for i atom
359 iq = facel*charge(ii)
360 nti = 2*ntype*type(ii)
362 C Zero the potential energy for this list
363 vctot = 0
364 Vvdwtot = 0
366 C Clear i atom forces
368 do k=nj0,nj1
370 C Get j neighbor index, and coordinate index
371 jnr = jjnr(k)+1
372 j3 = 3*jnr-2
374 C load j atom coordinates
375 jx1 = pos(j3+0)
376 jy1 = pos(j3+1)
377 jz1 = pos(j3+2)
379 C Calculate distance
380 dx11 = ix1 - jx1
381 dy11 = iy1 - jy1
382 dz11 = iz1 - jz1
383 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
385 C Calculate 1/r and 1/r2
386 rinv11 = 1.0/sqrt(rsq11)
388 C Load parameters for j atom
389 qq = iq*charge(jnr)
390 tj = nti+2*type(jnr)+1
391 c6 = vdwparam(tj)
392 c12 = vdwparam(tj+1)
393 rinvsq = rinv11*rinv11
395 C Calculate table index
396 r = rsq11*rinv11
398 C Calculate table index
399 rt = r*tabscale
400 n0 = rt
401 eps = rt-n0
402 eps2 = eps*eps
403 nnn = 4*n0+1
405 C Tabulated coulomb interaction
406 Y = VFtab(nnn)
407 F = VFtab(nnn+1)
408 Geps = eps*VFtab(nnn+2)
409 Heps2 = eps2*VFtab(nnn+3)
410 Fp = F+Geps+Heps2
411 VV = Y+eps*Fp
412 vcoul = qq*VV
413 vctot = vctot + vcoul
415 C Lennard-Jones interaction
416 rinvsix = rinvsq*rinvsq*rinvsq
417 Vvdw6 = c6*rinvsix
418 Vvdw12 = c12*rinvsix*rinvsix
419 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
421 C Inner loop uses 39 flops/iteration
422 end do
425 C Add i forces to mem and shifted force list
427 C Add potential energies to the group for this list
428 ggid = gid(n)+1
429 Vc(ggid) = Vc(ggid) + vctot
430 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
432 C Increment number of inner iterations
433 ninner = ninner + nj1 - nj0
435 C Outer loop uses 6 flops/iteration
436 end do
439 C Increment number of outer iterations
440 nouter = nouter + nn1 - nn0
441 if(nn1.lt.nri) goto 10
443 C Write outer/inner iteration count to pointers
444 outeriter = nouter
445 inneriter = ninner
446 return