Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_double / f77dkernel430.f
blob1b3bc1c7af2cef0619f5713641a1be172bb38925
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77dkernel430
33 C Coulomb interaction: Generalized-Born
34 C VdW interaction: Tabulated
35 C water optimization: No
36 C Calculate forces: yes
38 subroutine f77dkernel430(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*8 Vvdw(*),tabscale,VFtab(*)
76 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*8 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*8 shX,shY,shZ
83 real*8 fscal,tx,ty,tz
84 real*8 iq
85 real*8 qq,vcoul,vctot
86 integer*4 nti
87 integer*4 tj
88 real*8 Vvdw6,Vvdwtot
89 real*8 Vvdw12
90 real*8 r,rt,eps,eps2
91 integer*4 n0,nnn
92 real*8 Y,F,Geps,Heps2,Fp,VV
93 real*8 FF
94 real*8 fijC
95 real*8 fijD,fijR
96 real*8 isai,isaj,isaprod,gbscale,vgb
97 real*8 dvdasum,dvdatmp,dvdaj,fgb
98 real*8 ix1,iy1,iz1,fix1,fiy1,fiz1
99 real*8 jx1,jy1,jz1
100 real*8 dx11,dy11,dz11,rsq11,rinv11
101 real*8 c6,c12
104 C Reset outer and inner iteration counters
105 nouter = 0
106 ninner = 0
108 C Loop over thread workunits
109 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
110 if(nn1.gt.nri) nn1=nri
112 C Start outer loop over neighborlists
114 do n=nn0+1,nn1
116 C Load shift vector for this list
117 is3 = 3*shift(n)+1
118 shX = shiftvec(is3)
119 shY = shiftvec(is3+1)
120 shZ = shiftvec(is3+2)
122 C Load limits for loop over neighbors
123 nj0 = jindex(n)+1
124 nj1 = jindex(n+1)
126 C Get outer coordinate index
127 ii = iinr(n)+1
128 ii3 = 3*ii-2
130 C Load i atom data, add shift vector
131 ix1 = shX + pos(ii3+0)
132 iy1 = shY + pos(ii3+1)
133 iz1 = shZ + pos(ii3+2)
135 C Load parameters for i atom
136 iq = facel*charge(ii)
137 isai = invsqrta(ii)
138 nti = 2*ntype*type(ii)
140 C Zero the potential energy for this list
141 vctot = 0
142 Vvdwtot = 0
143 dvdasum = 0
145 C Clear i atom forces
146 fix1 = 0
147 fiy1 = 0
148 fiz1 = 0
150 do k=nj0,nj1
152 C Get j neighbor index, and coordinate index
153 jnr = jjnr(k)+1
154 j3 = 3*jnr-2
156 C load j atom coordinates
157 jx1 = pos(j3+0)
158 jy1 = pos(j3+1)
159 jz1 = pos(j3+2)
161 C Calculate distance
162 dx11 = ix1 - jx1
163 dy11 = iy1 - jy1
164 dz11 = iz1 - jz1
165 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
167 C Calculate 1/r and 1/r2
168 rinv11 = 1.0/sqrt(rsq11)
170 C Load parameters for j atom
171 isaj = invsqrta(jnr)
172 isaprod = isai*isaj
173 qq = iq*charge(jnr)
174 vcoul = qq*rinv11
175 fscal = vcoul*rinv11
176 qq = isaprod*(-qq)
177 gbscale = isaprod*gbtabscale
178 tj = nti+2*type(jnr)+1
179 c6 = vdwparam(tj)
180 c12 = vdwparam(tj+1)
182 C Tabulated Generalized-Born interaction
183 dvdaj = dvda(jnr)
184 r = rsq11*rinv11
186 C Calculate table index
187 rt = r*gbscale
188 n0 = rt
189 eps = rt-n0
190 eps2 = eps*eps
191 nnn = 4*n0+1
192 Y = GBtab(nnn)
193 F = GBtab(nnn+1)
194 Geps = eps*GBtab(nnn+2)
195 Heps2 = eps2*GBtab(nnn+3)
196 Fp = F+Geps+Heps2
197 VV = Y+eps*Fp
198 FF = Fp+Geps+2.0*Heps2
199 vgb = qq*VV
200 fijC = qq*FF*gbscale
201 dvdatmp = -0.5*(vgb+fijC*r)
202 dvdasum = dvdasum + dvdatmp
203 dvda(jnr) = dvdaj+dvdatmp*isaj*isaj
204 vctot = vctot + vcoul
206 C Calculate table index
207 r = rsq11*rinv11
209 C Calculate table index
210 rt = r*tabscale
211 n0 = rt
212 eps = rt-n0
213 eps2 = eps*eps
214 nnn = 8*n0+1
216 C Tabulated VdW interaction - dispersion
217 Y = VFtab(nnn)
218 F = VFtab(nnn+1)
219 Geps = eps*VFtab(nnn+2)
220 Heps2 = eps2*VFtab(nnn+3)
221 Fp = F+Geps+Heps2
222 VV = Y+eps*Fp
223 FF = Fp+Geps+2.0*Heps2
224 Vvdw6 = c6*VV
225 fijD = c6*FF
227 C Tabulated VdW interaction - repulsion
228 nnn = nnn+4
229 Y = VFtab(nnn)
230 F = VFtab(nnn+1)
231 Geps = eps*VFtab(nnn+2)
232 Heps2 = eps2*VFtab(nnn+3)
233 Fp = F+Geps+Heps2
234 VV = Y+eps*Fp
235 FF = Fp+Geps+2.0*Heps2
236 Vvdw12 = c12*VV
237 fijR = c12*FF
238 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
239 fscal = -((fijD+fijR)*tabscale+fijC-fscal)
240 & *rinv11
242 C Calculate temporary vectorial force
243 tx = fscal*dx11
244 ty = fscal*dy11
245 tz = fscal*dz11
247 C Increment i atom force
248 fix1 = fix1 + tx
249 fiy1 = fiy1 + ty
250 fiz1 = fiz1 + tz
252 C Decrement j atom force
253 faction(j3+0) = faction(j3+0) - tx
254 faction(j3+1) = faction(j3+1) - ty
255 faction(j3+2) = faction(j3+2) - tz
257 C Inner loop uses 85 flops/iteration
258 end do
261 C Add i forces to mem and shifted force list
262 faction(ii3+0) = faction(ii3+0) + fix1
263 faction(ii3+1) = faction(ii3+1) + fiy1
264 faction(ii3+2) = faction(ii3+2) + fiz1
265 fshift(is3) = fshift(is3)+fix1
266 fshift(is3+1) = fshift(is3+1)+fiy1
267 fshift(is3+2) = fshift(is3+2)+fiz1
269 C Add potential energies to the group for this list
270 ggid = gid(n)+1
271 Vc(ggid) = Vc(ggid) + vctot
272 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
273 dvda(ii) = dvda(ii) + dvdasum*isai*isai
275 C Increment number of inner iterations
276 ninner = ninner + nj1 - nj0
278 C Outer loop uses 13 flops/iteration
279 end do
282 C Increment number of outer iterations
283 nouter = nouter + nn1 - nn0
284 if(nn1.lt.nri) goto 10
286 C Write outer/inner iteration count to pointers
287 outeriter = nouter
288 inneriter = ninner
289 return
298 C Gromacs nonbonded kernel f77dkernel430nf
299 C Coulomb interaction: Generalized-Born
300 C VdW interaction: Tabulated
301 C water optimization: No
302 C Calculate forces: no
304 subroutine f77dkernel430nf(
305 & nri,
306 & iinr,
307 & jindex,
308 & jjnr,
309 & shift,
310 & shiftvec,
311 & fshift,
312 & gid,
313 & pos,
314 & faction,
315 & charge,
316 & facel,
317 & krf,
318 & crf,
319 & Vc,
320 & type,
321 & ntype,
322 & vdwparam,
323 & Vvdw,
324 & tabscale,
325 & VFtab,
326 & invsqrta,
327 & dvda,
328 & gbtabscale,
329 & GBtab,
330 & nthreads,
331 & count,
332 & mtx,
333 & outeriter,
334 & inneriter,
335 & work)
336 implicit none
337 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
338 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
339 integer*4 gid(*),type(*),ntype
340 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
341 real*8 Vvdw(*),tabscale,VFtab(*)
342 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
343 integer*4 nthreads,count,mtx,outeriter,inneriter
344 real*8 work(*)
346 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
347 integer*4 nn0,nn1,nouter,ninner
348 real*8 shX,shY,shZ
349 real*8 iq
350 real*8 qq,vcoul,vctot
351 integer*4 nti
352 integer*4 tj
353 real*8 Vvdw6,Vvdwtot
354 real*8 Vvdw12
355 real*8 r,rt,eps,eps2
356 integer*4 n0,nnn
357 real*8 Y,F,Geps,Heps2,Fp,VV
358 real*8 isai,isaj,isaprod,gbscale,vgb
359 real*8 ix1,iy1,iz1
360 real*8 jx1,jy1,jz1
361 real*8 dx11,dy11,dz11,rsq11,rinv11
362 real*8 c6,c12
365 C Reset outer and inner iteration counters
366 nouter = 0
367 ninner = 0
369 C Loop over thread workunits
370 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
371 if(nn1.gt.nri) nn1=nri
373 C Start outer loop over neighborlists
375 do n=nn0+1,nn1
377 C Load shift vector for this list
378 is3 = 3*shift(n)+1
379 shX = shiftvec(is3)
380 shY = shiftvec(is3+1)
381 shZ = shiftvec(is3+2)
383 C Load limits for loop over neighbors
384 nj0 = jindex(n)+1
385 nj1 = jindex(n+1)
387 C Get outer coordinate index
388 ii = iinr(n)+1
389 ii3 = 3*ii-2
391 C Load i atom data, add shift vector
392 ix1 = shX + pos(ii3+0)
393 iy1 = shY + pos(ii3+1)
394 iz1 = shZ + pos(ii3+2)
396 C Load parameters for i atom
397 iq = facel*charge(ii)
398 isai = invsqrta(ii)
399 nti = 2*ntype*type(ii)
401 C Zero the potential energy for this list
402 vctot = 0
403 Vvdwtot = 0
405 C Clear i atom forces
407 do k=nj0,nj1
409 C Get j neighbor index, and coordinate index
410 jnr = jjnr(k)+1
411 j3 = 3*jnr-2
413 C load j atom coordinates
414 jx1 = pos(j3+0)
415 jy1 = pos(j3+1)
416 jz1 = pos(j3+2)
418 C Calculate distance
419 dx11 = ix1 - jx1
420 dy11 = iy1 - jy1
421 dz11 = iz1 - jz1
422 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
424 C Calculate 1/r and 1/r2
425 rinv11 = 1.0/sqrt(rsq11)
427 C Load parameters for j atom
428 isaj = invsqrta(jnr)
429 isaprod = isai*isaj
430 qq = iq*charge(jnr)
431 vcoul = qq*rinv11
432 qq = isaprod*(-qq)
433 gbscale = isaprod*gbtabscale
434 tj = nti+2*type(jnr)+1
435 c6 = vdwparam(tj)
436 c12 = vdwparam(tj+1)
438 C Tabulated Generalized-Born interaction
439 r = rsq11*rinv11
441 C Calculate table index
442 rt = r*gbscale
443 n0 = rt
444 eps = rt-n0
445 eps2 = eps*eps
446 nnn = 4*n0+1
447 Y = GBtab(nnn)
448 F = GBtab(nnn+1)
449 Geps = eps*GBtab(nnn+2)
450 Heps2 = eps2*GBtab(nnn+3)
451 Fp = F+Geps+Heps2
452 VV = Y+eps*Fp
453 vgb = qq*VV
454 vctot = vctot + vcoul
456 C Calculate table index
457 r = rsq11*rinv11
459 C Calculate table index
460 rt = r*tabscale
461 n0 = rt
462 eps = rt-n0
463 eps2 = eps*eps
464 nnn = 8*n0+1
466 C Tabulated VdW interaction - dispersion
467 Y = VFtab(nnn)
468 F = VFtab(nnn+1)
469 Geps = eps*VFtab(nnn+2)
470 Heps2 = eps2*VFtab(nnn+3)
471 Fp = F+Geps+Heps2
472 VV = Y+eps*Fp
473 Vvdw6 = c6*VV
475 C Tabulated VdW interaction - repulsion
476 nnn = nnn+4
477 Y = VFtab(nnn)
478 F = VFtab(nnn+1)
479 Geps = eps*VFtab(nnn+2)
480 Heps2 = eps2*VFtab(nnn+3)
481 Fp = F+Geps+Heps2
482 VV = Y+eps*Fp
483 Vvdw12 = c12*VV
484 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
486 C Inner loop uses 54 flops/iteration
487 end do
490 C Add i forces to mem and shifted force list
492 C Add potential energies to the group for this list
493 ggid = gid(n)+1
494 Vc(ggid) = Vc(ggid) + vctot
495 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
497 C Increment number of inner iterations
498 ninner = ninner + nj1 - nj0
500 C Outer loop uses 6 flops/iteration
501 end do
504 C Increment number of outer iterations
505 nouter = nouter + nn1 - nn0
506 if(nn1.lt.nri) goto 10
508 C Write outer/inner iteration count to pointers
509 outeriter = nouter
510 inneriter = ninner
511 return