Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_single / f77skernel301.f
blob22c7441faca555656f65e0f0c271f566f3baaae9
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77skernel301
33 C Coulomb interaction: Tabulated
34 C VdW interaction: Not calculated
35 C water optimization: SPC/TIP3P - other atoms
36 C Calculate forces: yes
38 subroutine f77skernel301(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*4 Vvdw(*),tabscale,VFtab(*)
76 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*4 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*4 shX,shY,shZ
83 real*4 fscal,tx,ty,tz
84 real*4 jq
85 real*4 qq,vcoul,vctot
86 real*4 r,rt,eps,eps2
87 integer*4 n0,nnn
88 real*4 Y,F,Geps,Heps2,Fp,VV
89 real*4 FF
90 real*4 fijC
91 real*4 ix1,iy1,iz1,fix1,fiy1,fiz1
92 real*4 ix2,iy2,iz2,fix2,fiy2,fiz2
93 real*4 ix3,iy3,iz3,fix3,fiy3,fiz3
94 real*4 jx1,jy1,jz1,fjx1,fjy1,fjz1
95 real*4 dx11,dy11,dz11,rsq11,rinv11
96 real*4 dx21,dy21,dz21,rsq21,rinv21
97 real*4 dx31,dy31,dz31,rsq31,rinv31
98 real*4 qO,qH
101 C Initialize water data
102 ii = iinr(1)+1
103 qO = facel*charge(ii)
104 qH = facel*charge(ii+1)
107 C Reset outer and inner iteration counters
108 nouter = 0
109 ninner = 0
111 C Loop over thread workunits
112 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
113 if(nn1.gt.nri) nn1=nri
115 C Start outer loop over neighborlists
117 do n=nn0+1,nn1
119 C Load shift vector for this list
120 is3 = 3*shift(n)+1
121 shX = shiftvec(is3)
122 shY = shiftvec(is3+1)
123 shZ = shiftvec(is3+2)
125 C Load limits for loop over neighbors
126 nj0 = jindex(n)+1
127 nj1 = jindex(n+1)
129 C Get outer coordinate index
130 ii = iinr(n)+1
131 ii3 = 3*ii-2
133 C Load i atom data, add shift vector
134 ix1 = shX + pos(ii3+0)
135 iy1 = shY + pos(ii3+1)
136 iz1 = shZ + pos(ii3+2)
137 ix2 = shX + pos(ii3+3)
138 iy2 = shY + pos(ii3+4)
139 iz2 = shZ + pos(ii3+5)
140 ix3 = shX + pos(ii3+6)
141 iy3 = shY + pos(ii3+7)
142 iz3 = shZ + pos(ii3+8)
144 C Zero the potential energy for this list
145 vctot = 0
147 C Clear i atom forces
148 fix1 = 0
149 fiy1 = 0
150 fiz1 = 0
151 fix2 = 0
152 fiy2 = 0
153 fiz2 = 0
154 fix3 = 0
155 fiy3 = 0
156 fiz3 = 0
158 do k=nj0,nj1
160 C Get j neighbor index, and coordinate index
161 jnr = jjnr(k)+1
162 j3 = 3*jnr-2
164 C load j atom coordinates
165 jx1 = pos(j3+0)
166 jy1 = pos(j3+1)
167 jz1 = pos(j3+2)
169 C Calculate distance
170 dx11 = ix1 - jx1
171 dy11 = iy1 - jy1
172 dz11 = iz1 - jz1
173 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
174 dx21 = ix2 - jx1
175 dy21 = iy2 - jy1
176 dz21 = iz2 - jz1
177 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
178 dx31 = ix3 - jx1
179 dy31 = iy3 - jy1
180 dz31 = iz3 - jz1
181 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
183 C Calculate 1/r and 1/r2
184 rinv11 = 1.0/sqrt(rsq11)
185 rinv21 = 1.0/sqrt(rsq21)
186 rinv31 = 1.0/sqrt(rsq31)
188 C Load parameters for j atom
189 jq = charge(jnr+0)
190 qq = qO*jq
192 C Calculate table index
193 r = rsq11*rinv11
195 C Calculate table index
196 rt = r*tabscale
197 n0 = rt
198 eps = rt-n0
199 eps2 = eps*eps
200 nnn = 4*n0+1
202 C Tabulated coulomb interaction
203 Y = VFtab(nnn)
204 F = VFtab(nnn+1)
205 Geps = eps*VFtab(nnn+2)
206 Heps2 = eps2*VFtab(nnn+3)
207 Fp = F+Geps+Heps2
208 VV = Y+eps*Fp
209 FF = Fp+Geps+2.0*Heps2
210 vcoul = qq*VV
211 fijC = qq*FF
212 vctot = vctot + vcoul
213 fscal = -((fijC)*tabscale)*rinv11
215 C Calculate temporary vectorial force
216 tx = fscal*dx11
217 ty = fscal*dy11
218 tz = fscal*dz11
220 C Increment i atom force
221 fix1 = fix1 + tx
222 fiy1 = fiy1 + ty
223 fiz1 = fiz1 + tz
225 C Decrement j atom force
226 fjx1 = faction(j3+0) - tx
227 fjy1 = faction(j3+1) - ty
228 fjz1 = faction(j3+2) - tz
230 C Load parameters for j atom
231 qq = qH*jq
233 C Calculate table index
234 r = rsq21*rinv21
236 C Calculate table index
237 rt = r*tabscale
238 n0 = rt
239 eps = rt-n0
240 eps2 = eps*eps
241 nnn = 4*n0+1
243 C Tabulated coulomb interaction
244 Y = VFtab(nnn)
245 F = VFtab(nnn+1)
246 Geps = eps*VFtab(nnn+2)
247 Heps2 = eps2*VFtab(nnn+3)
248 Fp = F+Geps+Heps2
249 VV = Y+eps*Fp
250 FF = Fp+Geps+2.0*Heps2
251 vcoul = qq*VV
252 fijC = qq*FF
253 vctot = vctot + vcoul
254 fscal = -((fijC)*tabscale)*rinv21
256 C Calculate temporary vectorial force
257 tx = fscal*dx21
258 ty = fscal*dy21
259 tz = fscal*dz21
261 C Increment i atom force
262 fix2 = fix2 + tx
263 fiy2 = fiy2 + ty
264 fiz2 = fiz2 + tz
266 C Decrement j atom force
267 fjx1 = fjx1 - tx
268 fjy1 = fjy1 - ty
269 fjz1 = fjz1 - tz
271 C Load parameters for j atom
273 C Calculate table index
274 r = rsq31*rinv31
276 C Calculate table index
277 rt = r*tabscale
278 n0 = rt
279 eps = rt-n0
280 eps2 = eps*eps
281 nnn = 4*n0+1
283 C Tabulated coulomb interaction
284 Y = VFtab(nnn)
285 F = VFtab(nnn+1)
286 Geps = eps*VFtab(nnn+2)
287 Heps2 = eps2*VFtab(nnn+3)
288 Fp = F+Geps+Heps2
289 VV = Y+eps*Fp
290 FF = Fp+Geps+2.0*Heps2
291 vcoul = qq*VV
292 fijC = qq*FF
293 vctot = vctot + vcoul
294 fscal = -((fijC)*tabscale)*rinv31
296 C Calculate temporary vectorial force
297 tx = fscal*dx31
298 ty = fscal*dy31
299 tz = fscal*dz31
301 C Increment i atom force
302 fix3 = fix3 + tx
303 fiy3 = fiy3 + ty
304 fiz3 = fiz3 + tz
306 C Decrement j atom force
307 faction(j3+0) = fjx1 - tx
308 faction(j3+1) = fjy1 - ty
309 faction(j3+2) = fjz1 - tz
311 C Inner loop uses 125 flops/iteration
312 end do
315 C Add i forces to mem and shifted force list
316 faction(ii3+0) = faction(ii3+0) + fix1
317 faction(ii3+1) = faction(ii3+1) + fiy1
318 faction(ii3+2) = faction(ii3+2) + fiz1
319 faction(ii3+3) = faction(ii3+3) + fix2
320 faction(ii3+4) = faction(ii3+4) + fiy2
321 faction(ii3+5) = faction(ii3+5) + fiz2
322 faction(ii3+6) = faction(ii3+6) + fix3
323 faction(ii3+7) = faction(ii3+7) + fiy3
324 faction(ii3+8) = faction(ii3+8) + fiz3
325 fshift(is3) = fshift(is3)+fix1+fix2+fix3
326 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3
327 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3
329 C Add potential energies to the group for this list
330 ggid = gid(n)+1
331 Vc(ggid) = Vc(ggid) + vctot
333 C Increment number of inner iterations
334 ninner = ninner + nj1 - nj0
336 C Outer loop uses 28 flops/iteration
337 end do
340 C Increment number of outer iterations
341 nouter = nouter + nn1 - nn0
342 if(nn1.lt.nri) goto 10
344 C Write outer/inner iteration count to pointers
345 outeriter = nouter
346 inneriter = ninner
347 return
356 C Gromacs nonbonded kernel f77skernel301nf
357 C Coulomb interaction: Tabulated
358 C VdW interaction: Not calculated
359 C water optimization: SPC/TIP3P - other atoms
360 C Calculate forces: no
362 subroutine f77skernel301nf(
363 & nri,
364 & iinr,
365 & jindex,
366 & jjnr,
367 & shift,
368 & shiftvec,
369 & fshift,
370 & gid,
371 & pos,
372 & faction,
373 & charge,
374 & facel,
375 & krf,
376 & crf,
377 & Vc,
378 & type,
379 & ntype,
380 & vdwparam,
381 & Vvdw,
382 & tabscale,
383 & VFtab,
384 & invsqrta,
385 & dvda,
386 & gbtabscale,
387 & GBtab,
388 & nthreads,
389 & count,
390 & mtx,
391 & outeriter,
392 & inneriter,
393 & work)
394 implicit none
395 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
396 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
397 integer*4 gid(*),type(*),ntype
398 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
399 real*4 Vvdw(*),tabscale,VFtab(*)
400 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
401 integer*4 nthreads,count,mtx,outeriter,inneriter
402 real*4 work(*)
404 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
405 integer*4 nn0,nn1,nouter,ninner
406 real*4 shX,shY,shZ
407 real*4 jq
408 real*4 qq,vcoul,vctot
409 real*4 r,rt,eps,eps2
410 integer*4 n0,nnn
411 real*4 Y,F,Geps,Heps2,Fp,VV
412 real*4 ix1,iy1,iz1
413 real*4 ix2,iy2,iz2
414 real*4 ix3,iy3,iz3
415 real*4 jx1,jy1,jz1
416 real*4 dx11,dy11,dz11,rsq11,rinv11
417 real*4 dx21,dy21,dz21,rsq21,rinv21
418 real*4 dx31,dy31,dz31,rsq31,rinv31
419 real*4 qO,qH
422 C Initialize water data
423 ii = iinr(1)+1
424 qO = facel*charge(ii)
425 qH = facel*charge(ii+1)
428 C Reset outer and inner iteration counters
429 nouter = 0
430 ninner = 0
432 C Loop over thread workunits
433 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
434 if(nn1.gt.nri) nn1=nri
436 C Start outer loop over neighborlists
438 do n=nn0+1,nn1
440 C Load shift vector for this list
441 is3 = 3*shift(n)+1
442 shX = shiftvec(is3)
443 shY = shiftvec(is3+1)
444 shZ = shiftvec(is3+2)
446 C Load limits for loop over neighbors
447 nj0 = jindex(n)+1
448 nj1 = jindex(n+1)
450 C Get outer coordinate index
451 ii = iinr(n)+1
452 ii3 = 3*ii-2
454 C Load i atom data, add shift vector
455 ix1 = shX + pos(ii3+0)
456 iy1 = shY + pos(ii3+1)
457 iz1 = shZ + pos(ii3+2)
458 ix2 = shX + pos(ii3+3)
459 iy2 = shY + pos(ii3+4)
460 iz2 = shZ + pos(ii3+5)
461 ix3 = shX + pos(ii3+6)
462 iy3 = shY + pos(ii3+7)
463 iz3 = shZ + pos(ii3+8)
465 C Zero the potential energy for this list
466 vctot = 0
468 C Clear i atom forces
470 do k=nj0,nj1
472 C Get j neighbor index, and coordinate index
473 jnr = jjnr(k)+1
474 j3 = 3*jnr-2
476 C load j atom coordinates
477 jx1 = pos(j3+0)
478 jy1 = pos(j3+1)
479 jz1 = pos(j3+2)
481 C Calculate distance
482 dx11 = ix1 - jx1
483 dy11 = iy1 - jy1
484 dz11 = iz1 - jz1
485 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
486 dx21 = ix2 - jx1
487 dy21 = iy2 - jy1
488 dz21 = iz2 - jz1
489 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
490 dx31 = ix3 - jx1
491 dy31 = iy3 - jy1
492 dz31 = iz3 - jz1
493 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
495 C Calculate 1/r and 1/r2
496 rinv11 = 1.0/sqrt(rsq11)
497 rinv21 = 1.0/sqrt(rsq21)
498 rinv31 = 1.0/sqrt(rsq31)
500 C Load parameters for j atom
501 jq = charge(jnr+0)
502 qq = qO*jq
504 C Calculate table index
505 r = rsq11*rinv11
507 C Calculate table index
508 rt = r*tabscale
509 n0 = rt
510 eps = rt-n0
511 eps2 = eps*eps
512 nnn = 4*n0+1
514 C Tabulated coulomb interaction
515 Y = VFtab(nnn)
516 F = VFtab(nnn+1)
517 Geps = eps*VFtab(nnn+2)
518 Heps2 = eps2*VFtab(nnn+3)
519 Fp = F+Geps+Heps2
520 VV = Y+eps*Fp
521 vcoul = qq*VV
522 vctot = vctot + vcoul
524 C Load parameters for j atom
525 qq = qH*jq
527 C Calculate table index
528 r = rsq21*rinv21
530 C Calculate table index
531 rt = r*tabscale
532 n0 = rt
533 eps = rt-n0
534 eps2 = eps*eps
535 nnn = 4*n0+1
537 C Tabulated coulomb interaction
538 Y = VFtab(nnn)
539 F = VFtab(nnn+1)
540 Geps = eps*VFtab(nnn+2)
541 Heps2 = eps2*VFtab(nnn+3)
542 Fp = F+Geps+Heps2
543 VV = Y+eps*Fp
544 vcoul = qq*VV
545 vctot = vctot + vcoul
547 C Load parameters for j atom
549 C Calculate table index
550 r = rsq31*rinv31
552 C Calculate table index
553 rt = r*tabscale
554 n0 = rt
555 eps = rt-n0
556 eps2 = eps*eps
557 nnn = 4*n0+1
559 C Tabulated coulomb interaction
560 Y = VFtab(nnn)
561 F = VFtab(nnn+1)
562 Geps = eps*VFtab(nnn+2)
563 Heps2 = eps2*VFtab(nnn+3)
564 Fp = F+Geps+Heps2
565 VV = Y+eps*Fp
566 vcoul = qq*VV
567 vctot = vctot + vcoul
569 C Inner loop uses 77 flops/iteration
570 end do
573 C Add i forces to mem and shifted force list
575 C Add potential energies to the group for this list
576 ggid = gid(n)+1
577 Vc(ggid) = Vc(ggid) + vctot
579 C Increment number of inner iterations
580 ninner = ninner + nj1 - nj0
582 C Outer loop uses 10 flops/iteration
583 end do
586 C Increment number of outer iterations
587 nouter = nouter + nn1 - nn0
588 if(nn1.lt.nri) goto 10
590 C Write outer/inner iteration count to pointers
591 outeriter = nouter
592 inneriter = ninner
593 return