Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_single / f77skernel303.f
blob1f33a77c443ea1e892d370068a2c56077ee6b5ba
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77skernel303
33 C Coulomb interaction: Tabulated
34 C VdW interaction: Not calculated
35 C water optimization: TIP4P - other atoms
36 C Calculate forces: yes
38 subroutine f77skernel303(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*4 Vvdw(*),tabscale,VFtab(*)
76 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*4 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*4 shX,shY,shZ
83 real*4 fscal,tx,ty,tz
84 real*4 jq
85 real*4 qq,vcoul,vctot
86 real*4 r,rt,eps,eps2
87 integer*4 n0,nnn
88 real*4 Y,F,Geps,Heps2,Fp,VV
89 real*4 FF
90 real*4 fijC
91 real*4 ix2,iy2,iz2,fix2,fiy2,fiz2
92 real*4 ix3,iy3,iz3,fix3,fiy3,fiz3
93 real*4 ix4,iy4,iz4,fix4,fiy4,fiz4
94 real*4 jx1,jy1,jz1,fjx1,fjy1,fjz1
95 real*4 dx21,dy21,dz21,rsq21,rinv21
96 real*4 dx31,dy31,dz31,rsq31,rinv31
97 real*4 dx41,dy41,dz41,rsq41,rinv41
98 real*4 qH,qM
101 C Initialize water data
102 ii = iinr(1)+1
103 qH = facel*charge(ii+1)
104 qM = facel*charge(ii+3)
107 C Reset outer and inner iteration counters
108 nouter = 0
109 ninner = 0
111 C Loop over thread workunits
112 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
113 if(nn1.gt.nri) nn1=nri
115 C Start outer loop over neighborlists
117 do n=nn0+1,nn1
119 C Load shift vector for this list
120 is3 = 3*shift(n)+1
121 shX = shiftvec(is3)
122 shY = shiftvec(is3+1)
123 shZ = shiftvec(is3+2)
125 C Load limits for loop over neighbors
126 nj0 = jindex(n)+1
127 nj1 = jindex(n+1)
129 C Get outer coordinate index
130 ii = iinr(n)+1
131 ii3 = 3*ii-2
133 C Load i atom data, add shift vector
134 ix2 = shX + pos(ii3+3)
135 iy2 = shY + pos(ii3+4)
136 iz2 = shZ + pos(ii3+5)
137 ix3 = shX + pos(ii3+6)
138 iy3 = shY + pos(ii3+7)
139 iz3 = shZ + pos(ii3+8)
140 ix4 = shX + pos(ii3+9)
141 iy4 = shY + pos(ii3+10)
142 iz4 = shZ + pos(ii3+11)
144 C Zero the potential energy for this list
145 vctot = 0
147 C Clear i atom forces
148 fix2 = 0
149 fiy2 = 0
150 fiz2 = 0
151 fix3 = 0
152 fiy3 = 0
153 fiz3 = 0
154 fix4 = 0
155 fiy4 = 0
156 fiz4 = 0
158 do k=nj0,nj1
160 C Get j neighbor index, and coordinate index
161 jnr = jjnr(k)+1
162 j3 = 3*jnr-2
164 C load j atom coordinates
165 jx1 = pos(j3+0)
166 jy1 = pos(j3+1)
167 jz1 = pos(j3+2)
169 C Calculate distance
170 dx21 = ix2 - jx1
171 dy21 = iy2 - jy1
172 dz21 = iz2 - jz1
173 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
174 dx31 = ix3 - jx1
175 dy31 = iy3 - jy1
176 dz31 = iz3 - jz1
177 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
178 dx41 = ix4 - jx1
179 dy41 = iy4 - jy1
180 dz41 = iz4 - jz1
181 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
183 C Calculate 1/r and 1/r2
184 rinv21 = 1.0/sqrt(rsq21)
185 rinv31 = 1.0/sqrt(rsq31)
186 rinv41 = 1.0/sqrt(rsq41)
188 C Load parameters for j atom
189 jq = charge(jnr+0)
190 qq = qH*jq
192 C Calculate table index
193 r = rsq21*rinv21
195 C Calculate table index
196 rt = r*tabscale
197 n0 = rt
198 eps = rt-n0
199 eps2 = eps*eps
200 nnn = 4*n0+1
202 C Tabulated coulomb interaction
203 Y = VFtab(nnn)
204 F = VFtab(nnn+1)
205 Geps = eps*VFtab(nnn+2)
206 Heps2 = eps2*VFtab(nnn+3)
207 Fp = F+Geps+Heps2
208 VV = Y+eps*Fp
209 FF = Fp+Geps+2.0*Heps2
210 vcoul = qq*VV
211 fijC = qq*FF
212 vctot = vctot + vcoul
213 fscal = -((fijC)*tabscale)*rinv21
215 C Calculate temporary vectorial force
216 tx = fscal*dx21
217 ty = fscal*dy21
218 tz = fscal*dz21
220 C Increment i atom force
221 fix2 = fix2 + tx
222 fiy2 = fiy2 + ty
223 fiz2 = fiz2 + tz
225 C Decrement j atom force
226 fjx1 = faction(j3+0) - tx
227 fjy1 = faction(j3+1) - ty
228 fjz1 = faction(j3+2) - tz
230 C Load parameters for j atom
232 C Calculate table index
233 r = rsq31*rinv31
235 C Calculate table index
236 rt = r*tabscale
237 n0 = rt
238 eps = rt-n0
239 eps2 = eps*eps
240 nnn = 4*n0+1
242 C Tabulated coulomb interaction
243 Y = VFtab(nnn)
244 F = VFtab(nnn+1)
245 Geps = eps*VFtab(nnn+2)
246 Heps2 = eps2*VFtab(nnn+3)
247 Fp = F+Geps+Heps2
248 VV = Y+eps*Fp
249 FF = Fp+Geps+2.0*Heps2
250 vcoul = qq*VV
251 fijC = qq*FF
252 vctot = vctot + vcoul
253 fscal = -((fijC)*tabscale)*rinv31
255 C Calculate temporary vectorial force
256 tx = fscal*dx31
257 ty = fscal*dy31
258 tz = fscal*dz31
260 C Increment i atom force
261 fix3 = fix3 + tx
262 fiy3 = fiy3 + ty
263 fiz3 = fiz3 + tz
265 C Decrement j atom force
266 fjx1 = fjx1 - tx
267 fjy1 = fjy1 - ty
268 fjz1 = fjz1 - tz
270 C Load parameters for j atom
271 qq = qM*jq
273 C Calculate table index
274 r = rsq41*rinv41
276 C Calculate table index
277 rt = r*tabscale
278 n0 = rt
279 eps = rt-n0
280 eps2 = eps*eps
281 nnn = 4*n0+1
283 C Tabulated coulomb interaction
284 Y = VFtab(nnn)
285 F = VFtab(nnn+1)
286 Geps = eps*VFtab(nnn+2)
287 Heps2 = eps2*VFtab(nnn+3)
288 Fp = F+Geps+Heps2
289 VV = Y+eps*Fp
290 FF = Fp+Geps+2.0*Heps2
291 vcoul = qq*VV
292 fijC = qq*FF
293 vctot = vctot + vcoul
294 fscal = -((fijC)*tabscale)*rinv41
296 C Calculate temporary vectorial force
297 tx = fscal*dx41
298 ty = fscal*dy41
299 tz = fscal*dz41
301 C Increment i atom force
302 fix4 = fix4 + tx
303 fiy4 = fiy4 + ty
304 fiz4 = fiz4 + tz
306 C Decrement j atom force
307 faction(j3+0) = fjx1 - tx
308 faction(j3+1) = fjy1 - ty
309 faction(j3+2) = fjz1 - tz
311 C Inner loop uses 125 flops/iteration
312 end do
315 C Add i forces to mem and shifted force list
316 faction(ii3+3) = faction(ii3+3) + fix2
317 faction(ii3+4) = faction(ii3+4) + fiy2
318 faction(ii3+5) = faction(ii3+5) + fiz2
319 faction(ii3+6) = faction(ii3+6) + fix3
320 faction(ii3+7) = faction(ii3+7) + fiy3
321 faction(ii3+8) = faction(ii3+8) + fiz3
322 faction(ii3+9) = faction(ii3+9) + fix4
323 faction(ii3+10) = faction(ii3+10) + fiy4
324 faction(ii3+11) = faction(ii3+11) + fiz4
325 fshift(is3) = fshift(is3)+fix2+fix3+fix4
326 fshift(is3+1) = fshift(is3+1)+fiy2+fiy3+fiy4
327 fshift(is3+2) = fshift(is3+2)+fiz2+fiz3+fiz4
329 C Add potential energies to the group for this list
330 ggid = gid(n)+1
331 Vc(ggid) = Vc(ggid) + vctot
333 C Increment number of inner iterations
334 ninner = ninner + nj1 - nj0
336 C Outer loop uses 28 flops/iteration
337 end do
340 C Increment number of outer iterations
341 nouter = nouter + nn1 - nn0
342 if(nn1.lt.nri) goto 10
344 C Write outer/inner iteration count to pointers
345 outeriter = nouter
346 inneriter = ninner
347 return
356 C Gromacs nonbonded kernel f77skernel303nf
357 C Coulomb interaction: Tabulated
358 C VdW interaction: Not calculated
359 C water optimization: TIP4P - other atoms
360 C Calculate forces: no
362 subroutine f77skernel303nf(
363 & nri,
364 & iinr,
365 & jindex,
366 & jjnr,
367 & shift,
368 & shiftvec,
369 & fshift,
370 & gid,
371 & pos,
372 & faction,
373 & charge,
374 & facel,
375 & krf,
376 & crf,
377 & Vc,
378 & type,
379 & ntype,
380 & vdwparam,
381 & Vvdw,
382 & tabscale,
383 & VFtab,
384 & invsqrta,
385 & dvda,
386 & gbtabscale,
387 & GBtab,
388 & nthreads,
389 & count,
390 & mtx,
391 & outeriter,
392 & inneriter,
393 & work)
394 implicit none
395 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
396 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
397 integer*4 gid(*),type(*),ntype
398 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
399 real*4 Vvdw(*),tabscale,VFtab(*)
400 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
401 integer*4 nthreads,count,mtx,outeriter,inneriter
402 real*4 work(*)
404 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
405 integer*4 nn0,nn1,nouter,ninner
406 real*4 shX,shY,shZ
407 real*4 jq
408 real*4 qq,vcoul,vctot
409 real*4 r,rt,eps,eps2
410 integer*4 n0,nnn
411 real*4 Y,F,Geps,Heps2,Fp,VV
412 real*4 ix2,iy2,iz2
413 real*4 ix3,iy3,iz3
414 real*4 ix4,iy4,iz4
415 real*4 jx1,jy1,jz1
416 real*4 dx21,dy21,dz21,rsq21,rinv21
417 real*4 dx31,dy31,dz31,rsq31,rinv31
418 real*4 dx41,dy41,dz41,rsq41,rinv41
419 real*4 qH,qM
422 C Initialize water data
423 ii = iinr(1)+1
424 qH = facel*charge(ii+1)
425 qM = facel*charge(ii+3)
428 C Reset outer and inner iteration counters
429 nouter = 0
430 ninner = 0
432 C Loop over thread workunits
433 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
434 if(nn1.gt.nri) nn1=nri
436 C Start outer loop over neighborlists
438 do n=nn0+1,nn1
440 C Load shift vector for this list
441 is3 = 3*shift(n)+1
442 shX = shiftvec(is3)
443 shY = shiftvec(is3+1)
444 shZ = shiftvec(is3+2)
446 C Load limits for loop over neighbors
447 nj0 = jindex(n)+1
448 nj1 = jindex(n+1)
450 C Get outer coordinate index
451 ii = iinr(n)+1
452 ii3 = 3*ii-2
454 C Load i atom data, add shift vector
455 ix2 = shX + pos(ii3+3)
456 iy2 = shY + pos(ii3+4)
457 iz2 = shZ + pos(ii3+5)
458 ix3 = shX + pos(ii3+6)
459 iy3 = shY + pos(ii3+7)
460 iz3 = shZ + pos(ii3+8)
461 ix4 = shX + pos(ii3+9)
462 iy4 = shY + pos(ii3+10)
463 iz4 = shZ + pos(ii3+11)
465 C Zero the potential energy for this list
466 vctot = 0
468 C Clear i atom forces
470 do k=nj0,nj1
472 C Get j neighbor index, and coordinate index
473 jnr = jjnr(k)+1
474 j3 = 3*jnr-2
476 C load j atom coordinates
477 jx1 = pos(j3+0)
478 jy1 = pos(j3+1)
479 jz1 = pos(j3+2)
481 C Calculate distance
482 dx21 = ix2 - jx1
483 dy21 = iy2 - jy1
484 dz21 = iz2 - jz1
485 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
486 dx31 = ix3 - jx1
487 dy31 = iy3 - jy1
488 dz31 = iz3 - jz1
489 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
490 dx41 = ix4 - jx1
491 dy41 = iy4 - jy1
492 dz41 = iz4 - jz1
493 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
495 C Calculate 1/r and 1/r2
496 rinv21 = 1.0/sqrt(rsq21)
497 rinv31 = 1.0/sqrt(rsq31)
498 rinv41 = 1.0/sqrt(rsq41)
500 C Load parameters for j atom
501 jq = charge(jnr+0)
502 qq = qH*jq
504 C Calculate table index
505 r = rsq21*rinv21
507 C Calculate table index
508 rt = r*tabscale
509 n0 = rt
510 eps = rt-n0
511 eps2 = eps*eps
512 nnn = 4*n0+1
514 C Tabulated coulomb interaction
515 Y = VFtab(nnn)
516 F = VFtab(nnn+1)
517 Geps = eps*VFtab(nnn+2)
518 Heps2 = eps2*VFtab(nnn+3)
519 Fp = F+Geps+Heps2
520 VV = Y+eps*Fp
521 vcoul = qq*VV
522 vctot = vctot + vcoul
524 C Load parameters for j atom
526 C Calculate table index
527 r = rsq31*rinv31
529 C Calculate table index
530 rt = r*tabscale
531 n0 = rt
532 eps = rt-n0
533 eps2 = eps*eps
534 nnn = 4*n0+1
536 C Tabulated coulomb interaction
537 Y = VFtab(nnn)
538 F = VFtab(nnn+1)
539 Geps = eps*VFtab(nnn+2)
540 Heps2 = eps2*VFtab(nnn+3)
541 Fp = F+Geps+Heps2
542 VV = Y+eps*Fp
543 vcoul = qq*VV
544 vctot = vctot + vcoul
546 C Load parameters for j atom
547 qq = qM*jq
549 C Calculate table index
550 r = rsq41*rinv41
552 C Calculate table index
553 rt = r*tabscale
554 n0 = rt
555 eps = rt-n0
556 eps2 = eps*eps
557 nnn = 4*n0+1
559 C Tabulated coulomb interaction
560 Y = VFtab(nnn)
561 F = VFtab(nnn+1)
562 Geps = eps*VFtab(nnn+2)
563 Heps2 = eps2*VFtab(nnn+3)
564 Fp = F+Geps+Heps2
565 VV = Y+eps*Fp
566 vcoul = qq*VV
567 vctot = vctot + vcoul
569 C Inner loop uses 77 flops/iteration
570 end do
573 C Add i forces to mem and shifted force list
575 C Add potential energies to the group for this list
576 ggid = gid(n)+1
577 Vc(ggid) = Vc(ggid) + vctot
579 C Increment number of inner iterations
580 ninner = ninner + nj1 - nj0
582 C Outer loop uses 10 flops/iteration
583 end do
586 C Increment number of outer iterations
587 nouter = nouter + nn1 - nn0
588 if(nn1.lt.nri) goto 10
590 C Write outer/inner iteration count to pointers
591 outeriter = nouter
592 inneriter = ninner
593 return