Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_double / f77dkernel131.f
blob43115420907fafad6b00a07986a2b140a90a166d
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77dkernel131
33 C Coulomb interaction: Normal Coulomb
34 C VdW interaction: Tabulated
35 C water optimization: SPC/TIP3P - other atoms
36 C Calculate forces: yes
38 subroutine f77dkernel131(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*8 Vvdw(*),tabscale,VFtab(*)
76 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*8 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*8 shX,shY,shZ
83 real*8 fscal,tx,ty,tz
84 real*8 rinvsq
85 real*8 jq
86 real*8 qq,vcoul,vctot
87 integer*4 nti
88 integer*4 tj
89 real*8 Vvdw6,Vvdwtot
90 real*8 Vvdw12
91 real*8 r,rt,eps,eps2
92 integer*4 n0,nnn
93 real*8 Y,F,Geps,Heps2,Fp,VV
94 real*8 FF
95 real*8 fijD,fijR
96 real*8 ix1,iy1,iz1,fix1,fiy1,fiz1
97 real*8 ix2,iy2,iz2,fix2,fiy2,fiz2
98 real*8 ix3,iy3,iz3,fix3,fiy3,fiz3
99 real*8 jx1,jy1,jz1,fjx1,fjy1,fjz1
100 real*8 dx11,dy11,dz11,rsq11,rinv11
101 real*8 dx21,dy21,dz21,rsq21,rinv21
102 real*8 dx31,dy31,dz31,rsq31,rinv31
103 real*8 qO,qH
104 real*8 c6,c12
107 C Initialize water data
108 ii = iinr(1)+1
109 qO = facel*charge(ii)
110 qH = facel*charge(ii+1)
111 nti = 2*ntype*type(ii)
114 C Reset outer and inner iteration counters
115 nouter = 0
116 ninner = 0
118 C Loop over thread workunits
119 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
120 if(nn1.gt.nri) nn1=nri
122 C Start outer loop over neighborlists
124 do n=nn0+1,nn1
126 C Load shift vector for this list
127 is3 = 3*shift(n)+1
128 shX = shiftvec(is3)
129 shY = shiftvec(is3+1)
130 shZ = shiftvec(is3+2)
132 C Load limits for loop over neighbors
133 nj0 = jindex(n)+1
134 nj1 = jindex(n+1)
136 C Get outer coordinate index
137 ii = iinr(n)+1
138 ii3 = 3*ii-2
140 C Load i atom data, add shift vector
141 ix1 = shX + pos(ii3+0)
142 iy1 = shY + pos(ii3+1)
143 iz1 = shZ + pos(ii3+2)
144 ix2 = shX + pos(ii3+3)
145 iy2 = shY + pos(ii3+4)
146 iz2 = shZ + pos(ii3+5)
147 ix3 = shX + pos(ii3+6)
148 iy3 = shY + pos(ii3+7)
149 iz3 = shZ + pos(ii3+8)
151 C Zero the potential energy for this list
152 vctot = 0
153 Vvdwtot = 0
155 C Clear i atom forces
156 fix1 = 0
157 fiy1 = 0
158 fiz1 = 0
159 fix2 = 0
160 fiy2 = 0
161 fiz2 = 0
162 fix3 = 0
163 fiy3 = 0
164 fiz3 = 0
166 do k=nj0,nj1
168 C Get j neighbor index, and coordinate index
169 jnr = jjnr(k)+1
170 j3 = 3*jnr-2
172 C load j atom coordinates
173 jx1 = pos(j3+0)
174 jy1 = pos(j3+1)
175 jz1 = pos(j3+2)
177 C Calculate distance
178 dx11 = ix1 - jx1
179 dy11 = iy1 - jy1
180 dz11 = iz1 - jz1
181 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
182 dx21 = ix2 - jx1
183 dy21 = iy2 - jy1
184 dz21 = iz2 - jz1
185 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
186 dx31 = ix3 - jx1
187 dy31 = iy3 - jy1
188 dz31 = iz3 - jz1
189 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
191 C Calculate 1/r and 1/r2
192 rinv11 = 1.0/sqrt(rsq11)
193 rinv21 = 1.0/sqrt(rsq21)
194 rinv31 = 1.0/sqrt(rsq31)
196 C Load parameters for j atom
197 jq = charge(jnr+0)
198 qq = qO*jq
199 tj = nti+2*type(jnr)+1
200 c6 = vdwparam(tj)
201 c12 = vdwparam(tj+1)
202 rinvsq = rinv11*rinv11
204 C Coulomb interaction
205 vcoul = qq*rinv11
206 vctot = vctot+vcoul
208 C Calculate table index
209 r = rsq11*rinv11
211 C Calculate table index
212 rt = r*tabscale
213 n0 = rt
214 eps = rt-n0
215 eps2 = eps*eps
216 nnn = 8*n0+1
218 C Tabulated VdW interaction - dispersion
219 Y = VFtab(nnn)
220 F = VFtab(nnn+1)
221 Geps = eps*VFtab(nnn+2)
222 Heps2 = eps2*VFtab(nnn+3)
223 Fp = F+Geps+Heps2
224 VV = Y+eps*Fp
225 FF = Fp+Geps+2.0*Heps2
226 Vvdw6 = c6*VV
227 fijD = c6*FF
229 C Tabulated VdW interaction - repulsion
230 nnn = nnn+4
231 Y = VFtab(nnn)
232 F = VFtab(nnn+1)
233 Geps = eps*VFtab(nnn+2)
234 Heps2 = eps2*VFtab(nnn+3)
235 Fp = F+Geps+Heps2
236 VV = Y+eps*Fp
237 FF = Fp+Geps+2.0*Heps2
238 Vvdw12 = c12*VV
239 fijR = c12*FF
240 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
241 fscal = (vcoul)*rinvsq-((fijD+fijR)
242 & *tabscale)*rinv11
244 C Calculate temporary vectorial force
245 tx = fscal*dx11
246 ty = fscal*dy11
247 tz = fscal*dz11
249 C Increment i atom force
250 fix1 = fix1 + tx
251 fiy1 = fiy1 + ty
252 fiz1 = fiz1 + tz
254 C Decrement j atom force
255 fjx1 = faction(j3+0) - tx
256 fjy1 = faction(j3+1) - ty
257 fjz1 = faction(j3+2) - tz
259 C Load parameters for j atom
260 qq = qH*jq
261 rinvsq = rinv21*rinv21
263 C Coulomb interaction
264 vcoul = qq*rinv21
265 vctot = vctot+vcoul
266 fscal = (vcoul)*rinvsq
268 C Calculate temporary vectorial force
269 tx = fscal*dx21
270 ty = fscal*dy21
271 tz = fscal*dz21
273 C Increment i atom force
274 fix2 = fix2 + tx
275 fiy2 = fiy2 + ty
276 fiz2 = fiz2 + tz
278 C Decrement j atom force
279 fjx1 = fjx1 - tx
280 fjy1 = fjy1 - ty
281 fjz1 = fjz1 - tz
283 C Load parameters for j atom
284 rinvsq = rinv31*rinv31
286 C Coulomb interaction
287 vcoul = qq*rinv31
288 vctot = vctot+vcoul
289 fscal = (vcoul)*rinvsq
291 C Calculate temporary vectorial force
292 tx = fscal*dx31
293 ty = fscal*dy31
294 tz = fscal*dz31
296 C Increment i atom force
297 fix3 = fix3 + tx
298 fiy3 = fiy3 + ty
299 fiz3 = fiz3 + tz
301 C Decrement j atom force
302 faction(j3+0) = fjx1 - tx
303 faction(j3+1) = fjy1 - ty
304 faction(j3+2) = fjz1 - tz
306 C Inner loop uses 127 flops/iteration
307 end do
310 C Add i forces to mem and shifted force list
311 faction(ii3+0) = faction(ii3+0) + fix1
312 faction(ii3+1) = faction(ii3+1) + fiy1
313 faction(ii3+2) = faction(ii3+2) + fiz1
314 faction(ii3+3) = faction(ii3+3) + fix2
315 faction(ii3+4) = faction(ii3+4) + fiy2
316 faction(ii3+5) = faction(ii3+5) + fiz2
317 faction(ii3+6) = faction(ii3+6) + fix3
318 faction(ii3+7) = faction(ii3+7) + fiy3
319 faction(ii3+8) = faction(ii3+8) + fiz3
320 fshift(is3) = fshift(is3)+fix1+fix2+fix3
321 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3
322 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3
324 C Add potential energies to the group for this list
325 ggid = gid(n)+1
326 Vc(ggid) = Vc(ggid) + vctot
327 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
329 C Increment number of inner iterations
330 ninner = ninner + nj1 - nj0
332 C Outer loop uses 29 flops/iteration
333 end do
336 C Increment number of outer iterations
337 nouter = nouter + nn1 - nn0
338 if(nn1.lt.nri) goto 10
340 C Write outer/inner iteration count to pointers
341 outeriter = nouter
342 inneriter = ninner
343 return
352 C Gromacs nonbonded kernel f77dkernel131nf
353 C Coulomb interaction: Normal Coulomb
354 C VdW interaction: Tabulated
355 C water optimization: SPC/TIP3P - other atoms
356 C Calculate forces: no
358 subroutine f77dkernel131nf(
359 & nri,
360 & iinr,
361 & jindex,
362 & jjnr,
363 & shift,
364 & shiftvec,
365 & fshift,
366 & gid,
367 & pos,
368 & faction,
369 & charge,
370 & facel,
371 & krf,
372 & crf,
373 & Vc,
374 & type,
375 & ntype,
376 & vdwparam,
377 & Vvdw,
378 & tabscale,
379 & VFtab,
380 & invsqrta,
381 & dvda,
382 & gbtabscale,
383 & GBtab,
384 & nthreads,
385 & count,
386 & mtx,
387 & outeriter,
388 & inneriter,
389 & work)
390 implicit none
391 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
392 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
393 integer*4 gid(*),type(*),ntype
394 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
395 real*8 Vvdw(*),tabscale,VFtab(*)
396 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
397 integer*4 nthreads,count,mtx,outeriter,inneriter
398 real*8 work(*)
400 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
401 integer*4 nn0,nn1,nouter,ninner
402 real*8 shX,shY,shZ
403 real*8 jq
404 real*8 qq,vcoul,vctot
405 integer*4 nti
406 integer*4 tj
407 real*8 Vvdw6,Vvdwtot
408 real*8 Vvdw12
409 real*8 r,rt,eps,eps2
410 integer*4 n0,nnn
411 real*8 Y,F,Geps,Heps2,Fp,VV
412 real*8 ix1,iy1,iz1
413 real*8 ix2,iy2,iz2
414 real*8 ix3,iy3,iz3
415 real*8 jx1,jy1,jz1
416 real*8 dx11,dy11,dz11,rsq11,rinv11
417 real*8 dx21,dy21,dz21,rsq21,rinv21
418 real*8 dx31,dy31,dz31,rsq31,rinv31
419 real*8 qO,qH
420 real*8 c6,c12
423 C Initialize water data
424 ii = iinr(1)+1
425 qO = facel*charge(ii)
426 qH = facel*charge(ii+1)
427 nti = 2*ntype*type(ii)
430 C Reset outer and inner iteration counters
431 nouter = 0
432 ninner = 0
434 C Loop over thread workunits
435 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
436 if(nn1.gt.nri) nn1=nri
438 C Start outer loop over neighborlists
440 do n=nn0+1,nn1
442 C Load shift vector for this list
443 is3 = 3*shift(n)+1
444 shX = shiftvec(is3)
445 shY = shiftvec(is3+1)
446 shZ = shiftvec(is3+2)
448 C Load limits for loop over neighbors
449 nj0 = jindex(n)+1
450 nj1 = jindex(n+1)
452 C Get outer coordinate index
453 ii = iinr(n)+1
454 ii3 = 3*ii-2
456 C Load i atom data, add shift vector
457 ix1 = shX + pos(ii3+0)
458 iy1 = shY + pos(ii3+1)
459 iz1 = shZ + pos(ii3+2)
460 ix2 = shX + pos(ii3+3)
461 iy2 = shY + pos(ii3+4)
462 iz2 = shZ + pos(ii3+5)
463 ix3 = shX + pos(ii3+6)
464 iy3 = shY + pos(ii3+7)
465 iz3 = shZ + pos(ii3+8)
467 C Zero the potential energy for this list
468 vctot = 0
469 Vvdwtot = 0
471 C Clear i atom forces
473 do k=nj0,nj1
475 C Get j neighbor index, and coordinate index
476 jnr = jjnr(k)+1
477 j3 = 3*jnr-2
479 C load j atom coordinates
480 jx1 = pos(j3+0)
481 jy1 = pos(j3+1)
482 jz1 = pos(j3+2)
484 C Calculate distance
485 dx11 = ix1 - jx1
486 dy11 = iy1 - jy1
487 dz11 = iz1 - jz1
488 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
489 dx21 = ix2 - jx1
490 dy21 = iy2 - jy1
491 dz21 = iz2 - jz1
492 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
493 dx31 = ix3 - jx1
494 dy31 = iy3 - jy1
495 dz31 = iz3 - jz1
496 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
498 C Calculate 1/r and 1/r2
499 rinv11 = 1.0/sqrt(rsq11)
500 rinv21 = 1.0/sqrt(rsq21)
501 rinv31 = 1.0/sqrt(rsq31)
503 C Load parameters for j atom
504 jq = charge(jnr+0)
505 qq = qO*jq
506 tj = nti+2*type(jnr)+1
507 c6 = vdwparam(tj)
508 c12 = vdwparam(tj+1)
510 C Coulomb interaction
511 vcoul = qq*rinv11
512 vctot = vctot+vcoul
514 C Calculate table index
515 r = rsq11*rinv11
517 C Calculate table index
518 rt = r*tabscale
519 n0 = rt
520 eps = rt-n0
521 eps2 = eps*eps
522 nnn = 8*n0+1
524 C Tabulated VdW interaction - dispersion
525 Y = VFtab(nnn)
526 F = VFtab(nnn+1)
527 Geps = eps*VFtab(nnn+2)
528 Heps2 = eps2*VFtab(nnn+3)
529 Fp = F+Geps+Heps2
530 VV = Y+eps*Fp
531 Vvdw6 = c6*VV
533 C Tabulated VdW interaction - repulsion
534 nnn = nnn+4
535 Y = VFtab(nnn)
536 F = VFtab(nnn+1)
537 Geps = eps*VFtab(nnn+2)
538 Heps2 = eps2*VFtab(nnn+3)
539 Fp = F+Geps+Heps2
540 VV = Y+eps*Fp
541 Vvdw12 = c12*VV
542 Vvdwtot = Vvdwtot+ Vvdw6 + Vvdw12
544 C Load parameters for j atom
545 qq = qH*jq
547 C Coulomb interaction
548 vcoul = qq*rinv21
549 vctot = vctot+vcoul
551 C Load parameters for j atom
553 C Coulomb interaction
554 vcoul = qq*rinv31
555 vctot = vctot+vcoul
557 C Inner loop uses 82 flops/iteration
558 end do
561 C Add i forces to mem and shifted force list
563 C Add potential energies to the group for this list
564 ggid = gid(n)+1
565 Vc(ggid) = Vc(ggid) + vctot
566 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
568 C Increment number of inner iterations
569 ninner = ninner + nj1 - nj0
571 C Outer loop uses 11 flops/iteration
572 end do
575 C Increment number of outer iterations
576 nouter = nouter + nn1 - nn0
577 if(nn1.lt.nri) goto 10
579 C Write outer/inner iteration count to pointers
580 outeriter = nouter
581 inneriter = ninner
582 return