Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_single / f77skernel123.f
blob83c731d15f6702fae102f0a5336d4d7633ded928
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77skernel123
33 C Coulomb interaction: Normal Coulomb
34 C VdW interaction: Buckingham
35 C water optimization: TIP4P - other atoms
36 C Calculate forces: yes
38 subroutine f77skernel123(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*4 Vvdw(*),tabscale,VFtab(*)
76 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*4 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*4 shX,shY,shZ
83 real*4 fscal,tx,ty,tz
84 real*4 rinvsq
85 real*4 jq
86 real*4 qq,vcoul,vctot
87 integer*4 nti
88 integer*4 tj
89 real*4 rinvsix
90 real*4 Vvdw6,Vvdwtot
91 real*4 Vvdwexp,br
92 real*4 ix1,iy1,iz1,fix1,fiy1,fiz1
93 real*4 ix2,iy2,iz2,fix2,fiy2,fiz2
94 real*4 ix3,iy3,iz3,fix3,fiy3,fiz3
95 real*4 ix4,iy4,iz4,fix4,fiy4,fiz4
96 real*4 jx1,jy1,jz1,fjx1,fjy1,fjz1
97 real*4 dx11,dy11,dz11,rsq11,rinv11
98 real*4 dx21,dy21,dz21,rsq21,rinv21
99 real*4 dx31,dy31,dz31,rsq31,rinv31
100 real*4 dx41,dy41,dz41,rsq41,rinv41
101 real*4 qH,qM
102 real*4 c6,cexp1,cexp2
105 C Initialize water data
106 ii = iinr(1)+1
107 qH = facel*charge(ii+1)
108 qM = facel*charge(ii+3)
109 nti = 3*ntype*type(ii)
112 C Reset outer and inner iteration counters
113 nouter = 0
114 ninner = 0
116 C Loop over thread workunits
117 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
118 if(nn1.gt.nri) nn1=nri
120 C Start outer loop over neighborlists
122 do n=nn0+1,nn1
124 C Load shift vector for this list
125 is3 = 3*shift(n)+1
126 shX = shiftvec(is3)
127 shY = shiftvec(is3+1)
128 shZ = shiftvec(is3+2)
130 C Load limits for loop over neighbors
131 nj0 = jindex(n)+1
132 nj1 = jindex(n+1)
134 C Get outer coordinate index
135 ii = iinr(n)+1
136 ii3 = 3*ii-2
138 C Load i atom data, add shift vector
139 ix1 = shX + pos(ii3+0)
140 iy1 = shY + pos(ii3+1)
141 iz1 = shZ + pos(ii3+2)
142 ix2 = shX + pos(ii3+3)
143 iy2 = shY + pos(ii3+4)
144 iz2 = shZ + pos(ii3+5)
145 ix3 = shX + pos(ii3+6)
146 iy3 = shY + pos(ii3+7)
147 iz3 = shZ + pos(ii3+8)
148 ix4 = shX + pos(ii3+9)
149 iy4 = shY + pos(ii3+10)
150 iz4 = shZ + pos(ii3+11)
152 C Zero the potential energy for this list
153 vctot = 0
154 Vvdwtot = 0
156 C Clear i atom forces
157 fix1 = 0
158 fiy1 = 0
159 fiz1 = 0
160 fix2 = 0
161 fiy2 = 0
162 fiz2 = 0
163 fix3 = 0
164 fiy3 = 0
165 fiz3 = 0
166 fix4 = 0
167 fiy4 = 0
168 fiz4 = 0
170 do k=nj0,nj1
172 C Get j neighbor index, and coordinate index
173 jnr = jjnr(k)+1
174 j3 = 3*jnr-2
176 C load j atom coordinates
177 jx1 = pos(j3+0)
178 jy1 = pos(j3+1)
179 jz1 = pos(j3+2)
181 C Calculate distance
182 dx11 = ix1 - jx1
183 dy11 = iy1 - jy1
184 dz11 = iz1 - jz1
185 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
186 dx21 = ix2 - jx1
187 dy21 = iy2 - jy1
188 dz21 = iz2 - jz1
189 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
190 dx31 = ix3 - jx1
191 dy31 = iy3 - jy1
192 dz31 = iz3 - jz1
193 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
194 dx41 = ix4 - jx1
195 dy41 = iy4 - jy1
196 dz41 = iz4 - jz1
197 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
199 C Calculate 1/r and 1/r2
200 rinv11 = 1.0/sqrt(rsq11)
201 rinv21 = 1.0/sqrt(rsq21)
202 rinv31 = 1.0/sqrt(rsq31)
203 rinv41 = 1.0/sqrt(rsq41)
205 C Load parameters for j atom
206 tj = nti+3*type(jnr)+1
207 c6 = vdwparam(tj)
208 cexp1 = vdwparam(tj+1)
209 cexp2 = vdwparam(tj+2)
210 rinvsq = rinv11*rinv11
212 C Buckingham interaction
213 rinvsix = rinvsq*rinvsq*rinvsq
214 Vvdw6 = c6*rinvsix
215 br = cexp2*rsq11*rinv11
216 Vvdwexp = cexp1*exp(-br)
217 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6
218 fscal = (br*Vvdwexp-6.0*Vvdw6)*rinvsq
220 C Calculate temporary vectorial force
221 tx = fscal*dx11
222 ty = fscal*dy11
223 tz = fscal*dz11
225 C Increment i atom force
226 fix1 = fix1 + tx
227 fiy1 = fiy1 + ty
228 fiz1 = fiz1 + tz
230 C Decrement j atom force
231 fjx1 = faction(j3+0) - tx
232 fjy1 = faction(j3+1) - ty
233 fjz1 = faction(j3+2) - tz
235 C Load parameters for j atom
236 jq = charge(jnr+0)
237 qq = qH*jq
238 rinvsq = rinv21*rinv21
240 C Coulomb interaction
241 vcoul = qq*rinv21
242 vctot = vctot+vcoul
243 fscal = (vcoul)*rinvsq
245 C Calculate temporary vectorial force
246 tx = fscal*dx21
247 ty = fscal*dy21
248 tz = fscal*dz21
250 C Increment i atom force
251 fix2 = fix2 + tx
252 fiy2 = fiy2 + ty
253 fiz2 = fiz2 + tz
255 C Decrement j atom force
256 fjx1 = fjx1 - tx
257 fjy1 = fjy1 - ty
258 fjz1 = fjz1 - tz
260 C Load parameters for j atom
261 rinvsq = rinv31*rinv31
263 C Coulomb interaction
264 vcoul = qq*rinv31
265 vctot = vctot+vcoul
266 fscal = (vcoul)*rinvsq
268 C Calculate temporary vectorial force
269 tx = fscal*dx31
270 ty = fscal*dy31
271 tz = fscal*dz31
273 C Increment i atom force
274 fix3 = fix3 + tx
275 fiy3 = fiy3 + ty
276 fiz3 = fiz3 + tz
278 C Decrement j atom force
279 fjx1 = fjx1 - tx
280 fjy1 = fjy1 - ty
281 fjz1 = fjz1 - tz
283 C Load parameters for j atom
284 qq = qM*jq
285 rinvsq = rinv41*rinv41
287 C Coulomb interaction
288 vcoul = qq*rinv41
289 vctot = vctot+vcoul
290 fscal = (vcoul)*rinvsq
292 C Calculate temporary vectorial force
293 tx = fscal*dx41
294 ty = fscal*dy41
295 tz = fscal*dz41
297 C Increment i atom force
298 fix4 = fix4 + tx
299 fiy4 = fiy4 + ty
300 fiz4 = fiz4 + tz
302 C Decrement j atom force
303 faction(j3+0) = fjx1 - tx
304 faction(j3+1) = fjy1 - ty
305 faction(j3+2) = fjz1 - tz
307 C Inner loop uses 141 flops/iteration
308 end do
311 C Add i forces to mem and shifted force list
312 faction(ii3+0) = faction(ii3+0) + fix1
313 faction(ii3+1) = faction(ii3+1) + fiy1
314 faction(ii3+2) = faction(ii3+2) + fiz1
315 faction(ii3+3) = faction(ii3+3) + fix2
316 faction(ii3+4) = faction(ii3+4) + fiy2
317 faction(ii3+5) = faction(ii3+5) + fiz2
318 faction(ii3+6) = faction(ii3+6) + fix3
319 faction(ii3+7) = faction(ii3+7) + fiy3
320 faction(ii3+8) = faction(ii3+8) + fiz3
321 faction(ii3+9) = faction(ii3+9) + fix4
322 faction(ii3+10) = faction(ii3+10) + fiy4
323 faction(ii3+11) = faction(ii3+11) + fiz4
324 fshift(is3) = fshift(is3)+fix1+fix2+fix3+fix4
325 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3+fiy4
326 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3+fiz4
328 C Add potential energies to the group for this list
329 ggid = gid(n)+1
330 Vc(ggid) = Vc(ggid) + vctot
331 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
333 C Increment number of inner iterations
334 ninner = ninner + nj1 - nj0
336 C Outer loop uses 38 flops/iteration
337 end do
340 C Increment number of outer iterations
341 nouter = nouter + nn1 - nn0
342 if(nn1.lt.nri) goto 10
344 C Write outer/inner iteration count to pointers
345 outeriter = nouter
346 inneriter = ninner
347 return
356 C Gromacs nonbonded kernel f77skernel123nf
357 C Coulomb interaction: Normal Coulomb
358 C VdW interaction: Buckingham
359 C water optimization: TIP4P - other atoms
360 C Calculate forces: no
362 subroutine f77skernel123nf(
363 & nri,
364 & iinr,
365 & jindex,
366 & jjnr,
367 & shift,
368 & shiftvec,
369 & fshift,
370 & gid,
371 & pos,
372 & faction,
373 & charge,
374 & facel,
375 & krf,
376 & crf,
377 & Vc,
378 & type,
379 & ntype,
380 & vdwparam,
381 & Vvdw,
382 & tabscale,
383 & VFtab,
384 & invsqrta,
385 & dvda,
386 & gbtabscale,
387 & GBtab,
388 & nthreads,
389 & count,
390 & mtx,
391 & outeriter,
392 & inneriter,
393 & work)
394 implicit none
395 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
396 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
397 integer*4 gid(*),type(*),ntype
398 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
399 real*4 Vvdw(*),tabscale,VFtab(*)
400 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
401 integer*4 nthreads,count,mtx,outeriter,inneriter
402 real*4 work(*)
404 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
405 integer*4 nn0,nn1,nouter,ninner
406 real*4 shX,shY,shZ
407 real*4 rinvsq
408 real*4 jq
409 real*4 qq,vcoul,vctot
410 integer*4 nti
411 integer*4 tj
412 real*4 rinvsix
413 real*4 Vvdw6,Vvdwtot
414 real*4 Vvdwexp,br
415 real*4 ix1,iy1,iz1
416 real*4 ix2,iy2,iz2
417 real*4 ix3,iy3,iz3
418 real*4 ix4,iy4,iz4
419 real*4 jx1,jy1,jz1
420 real*4 dx11,dy11,dz11,rsq11,rinv11
421 real*4 dx21,dy21,dz21,rsq21,rinv21
422 real*4 dx31,dy31,dz31,rsq31,rinv31
423 real*4 dx41,dy41,dz41,rsq41,rinv41
424 real*4 qH,qM
425 real*4 c6,cexp1,cexp2
428 C Initialize water data
429 ii = iinr(1)+1
430 qH = facel*charge(ii+1)
431 qM = facel*charge(ii+3)
432 nti = 3*ntype*type(ii)
435 C Reset outer and inner iteration counters
436 nouter = 0
437 ninner = 0
439 C Loop over thread workunits
440 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
441 if(nn1.gt.nri) nn1=nri
443 C Start outer loop over neighborlists
445 do n=nn0+1,nn1
447 C Load shift vector for this list
448 is3 = 3*shift(n)+1
449 shX = shiftvec(is3)
450 shY = shiftvec(is3+1)
451 shZ = shiftvec(is3+2)
453 C Load limits for loop over neighbors
454 nj0 = jindex(n)+1
455 nj1 = jindex(n+1)
457 C Get outer coordinate index
458 ii = iinr(n)+1
459 ii3 = 3*ii-2
461 C Load i atom data, add shift vector
462 ix1 = shX + pos(ii3+0)
463 iy1 = shY + pos(ii3+1)
464 iz1 = shZ + pos(ii3+2)
465 ix2 = shX + pos(ii3+3)
466 iy2 = shY + pos(ii3+4)
467 iz2 = shZ + pos(ii3+5)
468 ix3 = shX + pos(ii3+6)
469 iy3 = shY + pos(ii3+7)
470 iz3 = shZ + pos(ii3+8)
471 ix4 = shX + pos(ii3+9)
472 iy4 = shY + pos(ii3+10)
473 iz4 = shZ + pos(ii3+11)
475 C Zero the potential energy for this list
476 vctot = 0
477 Vvdwtot = 0
479 C Clear i atom forces
481 do k=nj0,nj1
483 C Get j neighbor index, and coordinate index
484 jnr = jjnr(k)+1
485 j3 = 3*jnr-2
487 C load j atom coordinates
488 jx1 = pos(j3+0)
489 jy1 = pos(j3+1)
490 jz1 = pos(j3+2)
492 C Calculate distance
493 dx11 = ix1 - jx1
494 dy11 = iy1 - jy1
495 dz11 = iz1 - jz1
496 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
497 dx21 = ix2 - jx1
498 dy21 = iy2 - jy1
499 dz21 = iz2 - jz1
500 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
501 dx31 = ix3 - jx1
502 dy31 = iy3 - jy1
503 dz31 = iz3 - jz1
504 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
505 dx41 = ix4 - jx1
506 dy41 = iy4 - jy1
507 dz41 = iz4 - jz1
508 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
510 C Calculate 1/r and 1/r2
511 rinv11 = 1.0/sqrt(rsq11)
512 rinv21 = 1.0/sqrt(rsq21)
513 rinv31 = 1.0/sqrt(rsq31)
514 rinv41 = 1.0/sqrt(rsq41)
516 C Load parameters for j atom
517 tj = nti+3*type(jnr)+1
518 c6 = vdwparam(tj)
519 cexp1 = vdwparam(tj+1)
520 cexp2 = vdwparam(tj+2)
521 rinvsq = rinv11*rinv11
523 C Buckingham interaction
524 rinvsix = rinvsq*rinvsq*rinvsq
525 Vvdw6 = c6*rinvsix
526 br = cexp2*rsq11*rinv11
527 Vvdwexp = cexp1*exp(-br)
528 Vvdwtot = Vvdwtot+Vvdwexp-Vvdw6
530 C Load parameters for j atom
531 jq = charge(jnr+0)
532 qq = qH*jq
534 C Coulomb interaction
535 vcoul = qq*rinv21
536 vctot = vctot+vcoul
538 C Load parameters for j atom
540 C Coulomb interaction
541 vcoul = qq*rinv31
542 vctot = vctot+vcoul
544 C Load parameters for j atom
545 qq = qM*jq
547 C Coulomb interaction
548 vcoul = qq*rinv41
549 vctot = vctot+vcoul
551 C Inner loop uses 95 flops/iteration
552 end do
555 C Add i forces to mem and shifted force list
557 C Add potential energies to the group for this list
558 ggid = gid(n)+1
559 Vc(ggid) = Vc(ggid) + vctot
560 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
562 C Increment number of inner iterations
563 ninner = ninner + nj1 - nj0
565 C Outer loop uses 14 flops/iteration
566 end do
569 C Increment number of outer iterations
570 nouter = nouter + nn1 - nn0
571 if(nn1.lt.nri) goto 10
573 C Write outer/inner iteration count to pointers
574 outeriter = nouter
575 inneriter = ninner
576 return