Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_single / f77skernel113.f
blob4c89d6f72176645e65fa3de34879b1a66244a52f
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77skernel113
33 C Coulomb interaction: Normal Coulomb
34 C VdW interaction: Lennard-Jones
35 C water optimization: TIP4P - other atoms
36 C Calculate forces: yes
38 subroutine f77skernel113(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*4 Vvdw(*),tabscale,VFtab(*)
76 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*4 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*4 shX,shY,shZ
83 real*4 fscal,tx,ty,tz
84 real*4 rinvsq
85 real*4 jq
86 real*4 qq,vcoul,vctot
87 integer*4 nti
88 integer*4 tj
89 real*4 rinvsix
90 real*4 Vvdw6,Vvdwtot
91 real*4 Vvdw12
92 real*4 ix1,iy1,iz1,fix1,fiy1,fiz1
93 real*4 ix2,iy2,iz2,fix2,fiy2,fiz2
94 real*4 ix3,iy3,iz3,fix3,fiy3,fiz3
95 real*4 ix4,iy4,iz4,fix4,fiy4,fiz4
96 real*4 jx1,jy1,jz1,fjx1,fjy1,fjz1
97 real*4 dx11,dy11,dz11,rsq11
98 real*4 dx21,dy21,dz21,rsq21,rinv21
99 real*4 dx31,dy31,dz31,rsq31,rinv31
100 real*4 dx41,dy41,dz41,rsq41,rinv41
101 real*4 qH,qM
102 real*4 c6,c12
105 C Initialize water data
106 ii = iinr(1)+1
107 qH = facel*charge(ii+1)
108 qM = facel*charge(ii+3)
109 nti = 2*ntype*type(ii)
112 C Reset outer and inner iteration counters
113 nouter = 0
114 ninner = 0
116 C Loop over thread workunits
117 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
118 if(nn1.gt.nri) nn1=nri
120 C Start outer loop over neighborlists
122 do n=nn0+1,nn1
124 C Load shift vector for this list
125 is3 = 3*shift(n)+1
126 shX = shiftvec(is3)
127 shY = shiftvec(is3+1)
128 shZ = shiftvec(is3+2)
130 C Load limits for loop over neighbors
131 nj0 = jindex(n)+1
132 nj1 = jindex(n+1)
134 C Get outer coordinate index
135 ii = iinr(n)+1
136 ii3 = 3*ii-2
138 C Load i atom data, add shift vector
139 ix1 = shX + pos(ii3+0)
140 iy1 = shY + pos(ii3+1)
141 iz1 = shZ + pos(ii3+2)
142 ix2 = shX + pos(ii3+3)
143 iy2 = shY + pos(ii3+4)
144 iz2 = shZ + pos(ii3+5)
145 ix3 = shX + pos(ii3+6)
146 iy3 = shY + pos(ii3+7)
147 iz3 = shZ + pos(ii3+8)
148 ix4 = shX + pos(ii3+9)
149 iy4 = shY + pos(ii3+10)
150 iz4 = shZ + pos(ii3+11)
152 C Zero the potential energy for this list
153 vctot = 0
154 Vvdwtot = 0
156 C Clear i atom forces
157 fix1 = 0
158 fiy1 = 0
159 fiz1 = 0
160 fix2 = 0
161 fiy2 = 0
162 fiz2 = 0
163 fix3 = 0
164 fiy3 = 0
165 fiz3 = 0
166 fix4 = 0
167 fiy4 = 0
168 fiz4 = 0
170 do k=nj0,nj1
172 C Get j neighbor index, and coordinate index
173 jnr = jjnr(k)+1
174 j3 = 3*jnr-2
176 C load j atom coordinates
177 jx1 = pos(j3+0)
178 jy1 = pos(j3+1)
179 jz1 = pos(j3+2)
181 C Calculate distance
182 dx11 = ix1 - jx1
183 dy11 = iy1 - jy1
184 dz11 = iz1 - jz1
185 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
186 dx21 = ix2 - jx1
187 dy21 = iy2 - jy1
188 dz21 = iz2 - jz1
189 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
190 dx31 = ix3 - jx1
191 dy31 = iy3 - jy1
192 dz31 = iz3 - jz1
193 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
194 dx41 = ix4 - jx1
195 dy41 = iy4 - jy1
196 dz41 = iz4 - jz1
197 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
199 C Calculate 1/r and 1/r2
200 rinvsq = 1.0/rsq11
201 rinv21 = 1.0/sqrt(rsq21)
202 rinv31 = 1.0/sqrt(rsq31)
203 rinv41 = 1.0/sqrt(rsq41)
205 C Load parameters for j atom
206 tj = nti+2*type(jnr)+1
207 c6 = vdwparam(tj)
208 c12 = vdwparam(tj+1)
210 C Lennard-Jones interaction
211 rinvsix = rinvsq*rinvsq*rinvsq
212 Vvdw6 = c6*rinvsix
213 Vvdw12 = c12*rinvsix*rinvsix
214 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
215 fscal = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq
217 C Calculate temporary vectorial force
218 tx = fscal*dx11
219 ty = fscal*dy11
220 tz = fscal*dz11
222 C Increment i atom force
223 fix1 = fix1 + tx
224 fiy1 = fiy1 + ty
225 fiz1 = fiz1 + tz
227 C Decrement j atom force
228 fjx1 = faction(j3+0) - tx
229 fjy1 = faction(j3+1) - ty
230 fjz1 = faction(j3+2) - tz
232 C Load parameters for j atom
233 jq = charge(jnr+0)
234 qq = qH*jq
235 rinvsq = rinv21*rinv21
237 C Coulomb interaction
238 vcoul = qq*rinv21
239 vctot = vctot+vcoul
240 fscal = (vcoul)*rinvsq
242 C Calculate temporary vectorial force
243 tx = fscal*dx21
244 ty = fscal*dy21
245 tz = fscal*dz21
247 C Increment i atom force
248 fix2 = fix2 + tx
249 fiy2 = fiy2 + ty
250 fiz2 = fiz2 + tz
252 C Decrement j atom force
253 fjx1 = fjx1 - tx
254 fjy1 = fjy1 - ty
255 fjz1 = fjz1 - tz
257 C Load parameters for j atom
258 rinvsq = rinv31*rinv31
260 C Coulomb interaction
261 vcoul = qq*rinv31
262 vctot = vctot+vcoul
263 fscal = (vcoul)*rinvsq
265 C Calculate temporary vectorial force
266 tx = fscal*dx31
267 ty = fscal*dy31
268 tz = fscal*dz31
270 C Increment i atom force
271 fix3 = fix3 + tx
272 fiy3 = fiy3 + ty
273 fiz3 = fiz3 + tz
275 C Decrement j atom force
276 fjx1 = fjx1 - tx
277 fjy1 = fjy1 - ty
278 fjz1 = fjz1 - tz
280 C Load parameters for j atom
281 qq = qM*jq
282 rinvsq = rinv41*rinv41
284 C Coulomb interaction
285 vcoul = qq*rinv41
286 vctot = vctot+vcoul
287 fscal = (vcoul)*rinvsq
289 C Calculate temporary vectorial force
290 tx = fscal*dx41
291 ty = fscal*dy41
292 tz = fscal*dz41
294 C Increment i atom force
295 fix4 = fix4 + tx
296 fiy4 = fiy4 + ty
297 fiz4 = fiz4 + tz
299 C Decrement j atom force
300 faction(j3+0) = fjx1 - tx
301 faction(j3+1) = fjy1 - ty
302 faction(j3+2) = fjz1 - tz
304 C Inner loop uses 113 flops/iteration
305 end do
308 C Add i forces to mem and shifted force list
309 faction(ii3+0) = faction(ii3+0) + fix1
310 faction(ii3+1) = faction(ii3+1) + fiy1
311 faction(ii3+2) = faction(ii3+2) + fiz1
312 faction(ii3+3) = faction(ii3+3) + fix2
313 faction(ii3+4) = faction(ii3+4) + fiy2
314 faction(ii3+5) = faction(ii3+5) + fiz2
315 faction(ii3+6) = faction(ii3+6) + fix3
316 faction(ii3+7) = faction(ii3+7) + fiy3
317 faction(ii3+8) = faction(ii3+8) + fiz3
318 faction(ii3+9) = faction(ii3+9) + fix4
319 faction(ii3+10) = faction(ii3+10) + fiy4
320 faction(ii3+11) = faction(ii3+11) + fiz4
321 fshift(is3) = fshift(is3)+fix1+fix2+fix3+fix4
322 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3+fiy4
323 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3+fiz4
325 C Add potential energies to the group for this list
326 ggid = gid(n)+1
327 Vc(ggid) = Vc(ggid) + vctot
328 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
330 C Increment number of inner iterations
331 ninner = ninner + nj1 - nj0
333 C Outer loop uses 38 flops/iteration
334 end do
337 C Increment number of outer iterations
338 nouter = nouter + nn1 - nn0
339 if(nn1.lt.nri) goto 10
341 C Write outer/inner iteration count to pointers
342 outeriter = nouter
343 inneriter = ninner
344 return
353 C Gromacs nonbonded kernel f77skernel113nf
354 C Coulomb interaction: Normal Coulomb
355 C VdW interaction: Lennard-Jones
356 C water optimization: TIP4P - other atoms
357 C Calculate forces: no
359 subroutine f77skernel113nf(
360 & nri,
361 & iinr,
362 & jindex,
363 & jjnr,
364 & shift,
365 & shiftvec,
366 & fshift,
367 & gid,
368 & pos,
369 & faction,
370 & charge,
371 & facel,
372 & krf,
373 & crf,
374 & Vc,
375 & type,
376 & ntype,
377 & vdwparam,
378 & Vvdw,
379 & tabscale,
380 & VFtab,
381 & invsqrta,
382 & dvda,
383 & gbtabscale,
384 & GBtab,
385 & nthreads,
386 & count,
387 & mtx,
388 & outeriter,
389 & inneriter,
390 & work)
391 implicit none
392 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
393 real*4 shiftvec(*),fshift(*),pos(*),faction(*)
394 integer*4 gid(*),type(*),ntype
395 real*4 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
396 real*4 Vvdw(*),tabscale,VFtab(*)
397 real*4 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
398 integer*4 nthreads,count,mtx,outeriter,inneriter
399 real*4 work(*)
401 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
402 integer*4 nn0,nn1,nouter,ninner
403 real*4 shX,shY,shZ
404 real*4 rinvsq
405 real*4 jq
406 real*4 qq,vcoul,vctot
407 integer*4 nti
408 integer*4 tj
409 real*4 rinvsix
410 real*4 Vvdw6,Vvdwtot
411 real*4 Vvdw12
412 real*4 ix1,iy1,iz1
413 real*4 ix2,iy2,iz2
414 real*4 ix3,iy3,iz3
415 real*4 ix4,iy4,iz4
416 real*4 jx1,jy1,jz1
417 real*4 dx11,dy11,dz11,rsq11
418 real*4 dx21,dy21,dz21,rsq21,rinv21
419 real*4 dx31,dy31,dz31,rsq31,rinv31
420 real*4 dx41,dy41,dz41,rsq41,rinv41
421 real*4 qH,qM
422 real*4 c6,c12
425 C Initialize water data
426 ii = iinr(1)+1
427 qH = facel*charge(ii+1)
428 qM = facel*charge(ii+3)
429 nti = 2*ntype*type(ii)
432 C Reset outer and inner iteration counters
433 nouter = 0
434 ninner = 0
436 C Loop over thread workunits
437 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
438 if(nn1.gt.nri) nn1=nri
440 C Start outer loop over neighborlists
442 do n=nn0+1,nn1
444 C Load shift vector for this list
445 is3 = 3*shift(n)+1
446 shX = shiftvec(is3)
447 shY = shiftvec(is3+1)
448 shZ = shiftvec(is3+2)
450 C Load limits for loop over neighbors
451 nj0 = jindex(n)+1
452 nj1 = jindex(n+1)
454 C Get outer coordinate index
455 ii = iinr(n)+1
456 ii3 = 3*ii-2
458 C Load i atom data, add shift vector
459 ix1 = shX + pos(ii3+0)
460 iy1 = shY + pos(ii3+1)
461 iz1 = shZ + pos(ii3+2)
462 ix2 = shX + pos(ii3+3)
463 iy2 = shY + pos(ii3+4)
464 iz2 = shZ + pos(ii3+5)
465 ix3 = shX + pos(ii3+6)
466 iy3 = shY + pos(ii3+7)
467 iz3 = shZ + pos(ii3+8)
468 ix4 = shX + pos(ii3+9)
469 iy4 = shY + pos(ii3+10)
470 iz4 = shZ + pos(ii3+11)
472 C Zero the potential energy for this list
473 vctot = 0
474 Vvdwtot = 0
476 C Clear i atom forces
478 do k=nj0,nj1
480 C Get j neighbor index, and coordinate index
481 jnr = jjnr(k)+1
482 j3 = 3*jnr-2
484 C load j atom coordinates
485 jx1 = pos(j3+0)
486 jy1 = pos(j3+1)
487 jz1 = pos(j3+2)
489 C Calculate distance
490 dx11 = ix1 - jx1
491 dy11 = iy1 - jy1
492 dz11 = iz1 - jz1
493 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
494 dx21 = ix2 - jx1
495 dy21 = iy2 - jy1
496 dz21 = iz2 - jz1
497 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
498 dx31 = ix3 - jx1
499 dy31 = iy3 - jy1
500 dz31 = iz3 - jz1
501 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
502 dx41 = ix4 - jx1
503 dy41 = iy4 - jy1
504 dz41 = iz4 - jz1
505 rsq41 = dx41*dx41+dy41*dy41+dz41*dz41
507 C Calculate 1/r and 1/r2
508 rinvsq = 1.0/rsq11
509 rinv21 = 1.0/sqrt(rsq21)
510 rinv31 = 1.0/sqrt(rsq31)
511 rinv41 = 1.0/sqrt(rsq41)
513 C Load parameters for j atom
514 tj = nti+2*type(jnr)+1
515 c6 = vdwparam(tj)
516 c12 = vdwparam(tj+1)
518 C Lennard-Jones interaction
519 rinvsix = rinvsq*rinvsq*rinvsq
520 Vvdw6 = c6*rinvsix
521 Vvdw12 = c12*rinvsix*rinvsix
522 Vvdwtot = Vvdwtot+Vvdw12-Vvdw6
524 C Load parameters for j atom
525 jq = charge(jnr+0)
526 qq = qH*jq
528 C Coulomb interaction
529 vcoul = qq*rinv21
530 vctot = vctot+vcoul
532 C Load parameters for j atom
534 C Coulomb interaction
535 vcoul = qq*rinv31
536 vctot = vctot+vcoul
538 C Load parameters for j atom
539 qq = qM*jq
541 C Coulomb interaction
542 vcoul = qq*rinv41
543 vctot = vctot+vcoul
545 C Inner loop uses 66 flops/iteration
546 end do
549 C Add i forces to mem and shifted force list
551 C Add potential energies to the group for this list
552 ggid = gid(n)+1
553 Vc(ggid) = Vc(ggid) + vctot
554 Vvdw(ggid) = Vvdw(ggid) + Vvdwtot
556 C Increment number of inner iterations
557 ninner = ninner + nj1 - nj0
559 C Outer loop uses 14 flops/iteration
560 end do
563 C Increment number of outer iterations
564 nouter = nouter + nn1 - nn0
565 if(nn1.lt.nri) goto 10
567 C Write outer/inner iteration count to pointers
568 outeriter = nouter
569 inneriter = ninner
570 return