Merge "only use CPU_COUNT if it's available" into release-4-6
[gromacs/AngularHB.git] / src / gmxlib / nonbonded / nb_kernel_f77_double / f77dkernel201.f
blob49b18378e3810817ed4ba70fdb4149fc7e5e872a
2 C This source code is part of
4 C G R O M A C S
6 C Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7 C Copyright (c) 2001-2009, The GROMACS Development Team
9 C Gromacs is a library for molecular simulation and trajectory analysis,
10 C written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11 C a full list of developers and information, check out http://www.gromacs.org
13 C This program is free software; you can redistribute it and/or modify it under
14 C the terms of the GNU Lesser General Public License as published by the Free
15 C Software Foundation; either version 2 of the License, or (at your option) any
16 C later version.
17 C As a special exception, you may use this file as part of a free software
18 C library without restriction. Specifically, if other files instantiate
19 C templates or use macros or inline functions from this file, or you compile
20 C this file and link it with other files to produce an executable, this
21 C file does not by itself cause the resulting executable to be covered by
22 C the GNU Lesser General Public License.
24 C In plain-speak: do not worry about classes/macros/templates either - only
25 C changes to the library have to be LGPL, not an application linking with it.
27 C To help fund GROMACS development, we humbly ask that you cite
28 C the papers people have written on it - you can find them on the website!
32 C Gromacs nonbonded kernel f77dkernel201
33 C Coulomb interaction: Reaction field
34 C VdW interaction: Not calculated
35 C water optimization: SPC/TIP3P - other atoms
36 C Calculate forces: yes
38 subroutine f77dkernel201(
39 & nri,
40 & iinr,
41 & jindex,
42 & jjnr,
43 & shift,
44 & shiftvec,
45 & fshift,
46 & gid,
47 & pos,
48 & faction,
49 & charge,
50 & facel,
51 & krf,
52 & crf,
53 & Vc,
54 & type,
55 & ntype,
56 & vdwparam,
57 & Vvdw,
58 & tabscale,
59 & VFtab,
60 & invsqrta,
61 & dvda,
62 & gbtabscale,
63 & GBtab,
64 & nthreads,
65 & count,
66 & mtx,
67 & outeriter,
68 & inneriter,
69 & work)
70 implicit none
71 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
72 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
73 integer*4 gid(*),type(*),ntype
74 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
75 real*8 Vvdw(*),tabscale,VFtab(*)
76 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
77 integer*4 nthreads,count,mtx,outeriter,inneriter
78 real*8 work(*)
80 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
81 integer*4 nn0,nn1,nouter,ninner
82 real*8 shX,shY,shZ
83 real*8 fscal,tx,ty,tz
84 real*8 rinvsq
85 real*8 jq
86 real*8 qq,vcoul,vctot
87 real*8 krsq
88 real*8 ix1,iy1,iz1,fix1,fiy1,fiz1
89 real*8 ix2,iy2,iz2,fix2,fiy2,fiz2
90 real*8 ix3,iy3,iz3,fix3,fiy3,fiz3
91 real*8 jx1,jy1,jz1,fjx1,fjy1,fjz1
92 real*8 dx11,dy11,dz11,rsq11,rinv11
93 real*8 dx21,dy21,dz21,rsq21,rinv21
94 real*8 dx31,dy31,dz31,rsq31,rinv31
95 real*8 qO,qH
98 C Initialize water data
99 ii = iinr(1)+1
100 qO = facel*charge(ii)
101 qH = facel*charge(ii+1)
104 C Reset outer and inner iteration counters
105 nouter = 0
106 ninner = 0
108 C Loop over thread workunits
109 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
110 if(nn1.gt.nri) nn1=nri
112 C Start outer loop over neighborlists
114 do n=nn0+1,nn1
116 C Load shift vector for this list
117 is3 = 3*shift(n)+1
118 shX = shiftvec(is3)
119 shY = shiftvec(is3+1)
120 shZ = shiftvec(is3+2)
122 C Load limits for loop over neighbors
123 nj0 = jindex(n)+1
124 nj1 = jindex(n+1)
126 C Get outer coordinate index
127 ii = iinr(n)+1
128 ii3 = 3*ii-2
130 C Load i atom data, add shift vector
131 ix1 = shX + pos(ii3+0)
132 iy1 = shY + pos(ii3+1)
133 iz1 = shZ + pos(ii3+2)
134 ix2 = shX + pos(ii3+3)
135 iy2 = shY + pos(ii3+4)
136 iz2 = shZ + pos(ii3+5)
137 ix3 = shX + pos(ii3+6)
138 iy3 = shY + pos(ii3+7)
139 iz3 = shZ + pos(ii3+8)
141 C Zero the potential energy for this list
142 vctot = 0
144 C Clear i atom forces
145 fix1 = 0
146 fiy1 = 0
147 fiz1 = 0
148 fix2 = 0
149 fiy2 = 0
150 fiz2 = 0
151 fix3 = 0
152 fiy3 = 0
153 fiz3 = 0
155 do k=nj0,nj1
157 C Get j neighbor index, and coordinate index
158 jnr = jjnr(k)+1
159 j3 = 3*jnr-2
161 C load j atom coordinates
162 jx1 = pos(j3+0)
163 jy1 = pos(j3+1)
164 jz1 = pos(j3+2)
166 C Calculate distance
167 dx11 = ix1 - jx1
168 dy11 = iy1 - jy1
169 dz11 = iz1 - jz1
170 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
171 dx21 = ix2 - jx1
172 dy21 = iy2 - jy1
173 dz21 = iz2 - jz1
174 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
175 dx31 = ix3 - jx1
176 dy31 = iy3 - jy1
177 dz31 = iz3 - jz1
178 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
180 C Calculate 1/r and 1/r2
181 rinv11 = 1.0/sqrt(rsq11)
182 rinv21 = 1.0/sqrt(rsq21)
183 rinv31 = 1.0/sqrt(rsq31)
185 C Load parameters for j atom
186 jq = charge(jnr+0)
187 qq = qO*jq
188 rinvsq = rinv11*rinv11
190 C Coulomb reaction-field interaction
191 krsq = krf*rsq11
192 vcoul = qq*(rinv11+krsq-crf)
193 vctot = vctot+vcoul
194 fscal = (qq*(rinv11-2.0*krsq))*rinvsq
196 C Calculate temporary vectorial force
197 tx = fscal*dx11
198 ty = fscal*dy11
199 tz = fscal*dz11
201 C Increment i atom force
202 fix1 = fix1 + tx
203 fiy1 = fiy1 + ty
204 fiz1 = fiz1 + tz
206 C Decrement j atom force
207 fjx1 = faction(j3+0) - tx
208 fjy1 = faction(j3+1) - ty
209 fjz1 = faction(j3+2) - tz
211 C Load parameters for j atom
212 qq = qH*jq
213 rinvsq = rinv21*rinv21
215 C Coulomb reaction-field interaction
216 krsq = krf*rsq21
217 vcoul = qq*(rinv21+krsq-crf)
218 vctot = vctot+vcoul
219 fscal = (qq*(rinv21-2.0*krsq))*rinvsq
221 C Calculate temporary vectorial force
222 tx = fscal*dx21
223 ty = fscal*dy21
224 tz = fscal*dz21
226 C Increment i atom force
227 fix2 = fix2 + tx
228 fiy2 = fiy2 + ty
229 fiz2 = fiz2 + tz
231 C Decrement j atom force
232 fjx1 = fjx1 - tx
233 fjy1 = fjy1 - ty
234 fjz1 = fjz1 - tz
236 C Load parameters for j atom
237 rinvsq = rinv31*rinv31
239 C Coulomb reaction-field interaction
240 krsq = krf*rsq31
241 vcoul = qq*(rinv31+krsq-crf)
242 vctot = vctot+vcoul
243 fscal = (qq*(rinv31-2.0*krsq))*rinvsq
245 C Calculate temporary vectorial force
246 tx = fscal*dx31
247 ty = fscal*dy31
248 tz = fscal*dz31
250 C Increment i atom force
251 fix3 = fix3 + tx
252 fiy3 = fiy3 + ty
253 fiz3 = fiz3 + tz
255 C Decrement j atom force
256 faction(j3+0) = fjx1 - tx
257 faction(j3+1) = fjy1 - ty
258 faction(j3+2) = fjz1 - tz
260 C Inner loop uses 113 flops/iteration
261 end do
264 C Add i forces to mem and shifted force list
265 faction(ii3+0) = faction(ii3+0) + fix1
266 faction(ii3+1) = faction(ii3+1) + fiy1
267 faction(ii3+2) = faction(ii3+2) + fiz1
268 faction(ii3+3) = faction(ii3+3) + fix2
269 faction(ii3+4) = faction(ii3+4) + fiy2
270 faction(ii3+5) = faction(ii3+5) + fiz2
271 faction(ii3+6) = faction(ii3+6) + fix3
272 faction(ii3+7) = faction(ii3+7) + fiy3
273 faction(ii3+8) = faction(ii3+8) + fiz3
274 fshift(is3) = fshift(is3)+fix1+fix2+fix3
275 fshift(is3+1) = fshift(is3+1)+fiy1+fiy2+fiy3
276 fshift(is3+2) = fshift(is3+2)+fiz1+fiz2+fiz3
278 C Add potential energies to the group for this list
279 ggid = gid(n)+1
280 Vc(ggid) = Vc(ggid) + vctot
282 C Increment number of inner iterations
283 ninner = ninner + nj1 - nj0
285 C Outer loop uses 28 flops/iteration
286 end do
289 C Increment number of outer iterations
290 nouter = nouter + nn1 - nn0
291 if(nn1.lt.nri) goto 10
293 C Write outer/inner iteration count to pointers
294 outeriter = nouter
295 inneriter = ninner
296 return
305 C Gromacs nonbonded kernel f77dkernel201nf
306 C Coulomb interaction: Reaction field
307 C VdW interaction: Not calculated
308 C water optimization: SPC/TIP3P - other atoms
309 C Calculate forces: no
311 subroutine f77dkernel201nf(
312 & nri,
313 & iinr,
314 & jindex,
315 & jjnr,
316 & shift,
317 & shiftvec,
318 & fshift,
319 & gid,
320 & pos,
321 & faction,
322 & charge,
323 & facel,
324 & krf,
325 & crf,
326 & Vc,
327 & type,
328 & ntype,
329 & vdwparam,
330 & Vvdw,
331 & tabscale,
332 & VFtab,
333 & invsqrta,
334 & dvda,
335 & gbtabscale,
336 & GBtab,
337 & nthreads,
338 & count,
339 & mtx,
340 & outeriter,
341 & inneriter,
342 & work)
343 implicit none
344 integer*4 nri,iinr(*),jindex(*),jjnr(*),shift(*)
345 real*8 shiftvec(*),fshift(*),pos(*),faction(*)
346 integer*4 gid(*),type(*),ntype
347 real*8 charge(*),facel,krf,crf,Vc(*),vdwparam(*)
348 real*8 Vvdw(*),tabscale,VFtab(*)
349 real*8 invsqrta(*),dvda(*),gbtabscale,GBtab(*)
350 integer*4 nthreads,count,mtx,outeriter,inneriter
351 real*8 work(*)
353 integer*4 n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid
354 integer*4 nn0,nn1,nouter,ninner
355 real*8 shX,shY,shZ
356 real*8 jq
357 real*8 qq,vcoul,vctot
358 real*8 krsq
359 real*8 ix1,iy1,iz1
360 real*8 ix2,iy2,iz2
361 real*8 ix3,iy3,iz3
362 real*8 jx1,jy1,jz1
363 real*8 dx11,dy11,dz11,rsq11,rinv11
364 real*8 dx21,dy21,dz21,rsq21,rinv21
365 real*8 dx31,dy31,dz31,rsq31,rinv31
366 real*8 qO,qH
369 C Initialize water data
370 ii = iinr(1)+1
371 qO = facel*charge(ii)
372 qH = facel*charge(ii+1)
375 C Reset outer and inner iteration counters
376 nouter = 0
377 ninner = 0
379 C Loop over thread workunits
380 10 call f77kernelsync(mtx,count,nri,nthreads,nn0,nn1)
381 if(nn1.gt.nri) nn1=nri
383 C Start outer loop over neighborlists
385 do n=nn0+1,nn1
387 C Load shift vector for this list
388 is3 = 3*shift(n)+1
389 shX = shiftvec(is3)
390 shY = shiftvec(is3+1)
391 shZ = shiftvec(is3+2)
393 C Load limits for loop over neighbors
394 nj0 = jindex(n)+1
395 nj1 = jindex(n+1)
397 C Get outer coordinate index
398 ii = iinr(n)+1
399 ii3 = 3*ii-2
401 C Load i atom data, add shift vector
402 ix1 = shX + pos(ii3+0)
403 iy1 = shY + pos(ii3+1)
404 iz1 = shZ + pos(ii3+2)
405 ix2 = shX + pos(ii3+3)
406 iy2 = shY + pos(ii3+4)
407 iz2 = shZ + pos(ii3+5)
408 ix3 = shX + pos(ii3+6)
409 iy3 = shY + pos(ii3+7)
410 iz3 = shZ + pos(ii3+8)
412 C Zero the potential energy for this list
413 vctot = 0
415 C Clear i atom forces
417 do k=nj0,nj1
419 C Get j neighbor index, and coordinate index
420 jnr = jjnr(k)+1
421 j3 = 3*jnr-2
423 C load j atom coordinates
424 jx1 = pos(j3+0)
425 jy1 = pos(j3+1)
426 jz1 = pos(j3+2)
428 C Calculate distance
429 dx11 = ix1 - jx1
430 dy11 = iy1 - jy1
431 dz11 = iz1 - jz1
432 rsq11 = dx11*dx11+dy11*dy11+dz11*dz11
433 dx21 = ix2 - jx1
434 dy21 = iy2 - jy1
435 dz21 = iz2 - jz1
436 rsq21 = dx21*dx21+dy21*dy21+dz21*dz21
437 dx31 = ix3 - jx1
438 dy31 = iy3 - jy1
439 dz31 = iz3 - jz1
440 rsq31 = dx31*dx31+dy31*dy31+dz31*dz31
442 C Calculate 1/r and 1/r2
443 rinv11 = 1.0/sqrt(rsq11)
444 rinv21 = 1.0/sqrt(rsq21)
445 rinv31 = 1.0/sqrt(rsq31)
447 C Load parameters for j atom
448 jq = charge(jnr+0)
449 qq = qO*jq
451 C Coulomb reaction-field interaction
452 krsq = krf*rsq11
453 vcoul = qq*(rinv11+krsq-crf)
454 vctot = vctot+vcoul
456 C Load parameters for j atom
457 qq = qH*jq
459 C Coulomb reaction-field interaction
460 krsq = krf*rsq21
461 vcoul = qq*(rinv21+krsq-crf)
462 vctot = vctot+vcoul
464 C Load parameters for j atom
466 C Coulomb reaction-field interaction
467 krsq = krf*rsq31
468 vcoul = qq*(rinv31+krsq-crf)
469 vctot = vctot+vcoul
471 C Inner loop uses 71 flops/iteration
472 end do
475 C Add i forces to mem and shifted force list
477 C Add potential energies to the group for this list
478 ggid = gid(n)+1
479 Vc(ggid) = Vc(ggid) + vctot
481 C Increment number of inner iterations
482 ninner = ninner + nj1 - nj0
484 C Outer loop uses 10 flops/iteration
485 end do
488 C Increment number of outer iterations
489 nouter = nouter + nn1 - nn0
490 if(nn1.lt.nri) goto 10
492 C Write outer/inner iteration count to pointers
493 outeriter = nouter
494 inneriter = ninner
495 return