1 subroutine da_trace_report
3 !--------------------------------------------------------------------
4 ! Purpose: Produce a trace report
5 !--------------------------------------------------------------------
9 integer :: i ! loop counter
10 integer :: j ! loop counter
12 integer :: MasterNoRoutines
14 integer :: MinElapsedPos
15 integer :: MaxElapsedPos
18 integer :: itemp1(MaxNoRoutines)
19 integer :: MasterMaxHeap(0:num_procs-1,MaxNoRoutines)
20 integer :: MasterNoCalls(0:num_procs-1,MaxNoRoutines)
21 integer :: OverallNoCalls(MaxNoRoutines)
22 integer, allocatable :: Index(:)
25 real :: TotalElapsedTime !
26 real :: TotalCPUTime(1) !
27 real :: SpeedUp ! speed up factor
28 real :: PercentCPUTime ! percentage in CPU time
29 real :: PercentElapsedTime ! percentage in elapsed time
30 real :: rtemp1(MaxNoRoutines)
31 real :: MasterElapsedTime(0:num_procs-1,MaxNoRoutines)
32 real :: MasterElapsedTimeLocal(0:num_procs-1,MaxNoRoutines)
33 real :: MasterCPUTime(0:num_procs-1,MaxNoRoutines)
34 real :: MasterCPUTimeLocal(0:num_procs-1,MaxNoRoutines)
35 real :: OverallElapsedTime(MaxNoRoutines)
36 real :: OverallCPUTime(MaxNoRoutines)
38 character (len=TraceNameLen) :: MasterTimerNames(MaxNoRoutines)
40 if (.not. use_html) then
41 write (unit=trace_unit, fmt='(A)') &
42 "Report only available if use_html is true"
46 call system_clock (COUNT=temp)
48 TotalElapsedTime=temp-BaseElapsedTime ! on PE 0
50 call cpu_time(TempCpuTime)
52 TotalCPUTime(1) = TempCpuTime - BaseCPUTime
57 ! Ensure the lists from each PE match. use the routine list from the
58 ! traced PE as the master copy
60 MasterTimerNames(:)=TimerNames(:)
62 if (myproc == trace_pe) then
63 MasterNoRoutines=NoRoutines
69 call da_proc_sum_int(MasterNoRoutines)
70 ! only PE 0 knows the result
72 call mpi_bcast(MasterTimerNames(1:MaxNoRoutines), &
73 TraceNameLen*MaxNoRoutines, &
74 MPI_character,trace_pe, comm,ierr)
77 MasterElapsedTime(:,:)=0.0
78 MasterCPUTime(:,:)=0.0
79 MasterElapsedTimeLocal(:,:)=0.0
80 MasterCPUTimeLocal(:,:)=0.0
86 if (TimerNames(j) == MasterTimerNames(i)) then
87 MasterElapsedTime(myproc,i)=ElapsedTime(j)
88 MasterCPUTime(myproc,i)=CPUTime(j)
89 MasterElapsedTimeLocal(myproc,i)=ElapsedTimeLocal(j)
90 MasterCPUTimeLocal(myproc,i)=CPUTimeLocal(j)
91 MasterNoCalls(myproc,i)=NoCalls(j)
92 MasterMaxHeap(myproc,i)=MaxHeap(j)
100 call da_proc_sum_real(MasterElapsedTime(i,:))
101 call da_proc_sum_real(MasterCPUTime(i,:))
102 call da_proc_sum_real(MasterElapsedTimeLocal(i,:))
103 call da_proc_sum_real(MasterCPUTimeLocal(i,:))
104 call da_proc_sum_ints(MasterNoCalls(i,:))
105 call da_proc_sum_ints(MasterMaxHeap(i,:))
111 do j=1,MasterNoRoutines
112 rtemp1(j)=sum(MasterElapsedTimeLocal(:,j))
114 !==========================================================================
115 ! Sort subroutines into time order based on local Elapsed Time.
116 ! All PEs should have the same sort order after the sum.
117 !==========================================================================
119 allocate (Index(MasterNoRoutines))
121 call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
123 do j=1,MasterNoRoutines
124 OverallElapsedTime(j)=sum(MasterElapsedTimeLocal(:,j))
125 OverallCPUTime(j)=sum(MasterCPUTimeLocal(:,j))
126 OverallNoCalls(j)=sum(MasterNoCalls(:,j))
129 write(unit=trace_unit, fmt='(A,I4,A)') &
130 "</pre><hr><H1>For PE",trace_pe,"</H1>"
132 ! Output timing information
134 write(unit=trace_unit, &
135 fmt='("<a name=local><h2>Local Timing Summary</h2></a>")')
137 if (num_procs == 1) then
138 ! needs changing to work MPP
139 write (unit=trace_unit, &
140 fmt='("(Tracing itself took ",F8.1,"s CPU Time, and ",F8.1,a)') &
141 (CPUTimeLocalStart-CPUTimeStart(1)-TotalCPUTime(1)), &
142 (ElapsedTimeLocalStart-ElapsedTimeStart(1)-TotalElapsedTime) / &
144 "s elapsed time during the run. This is not included in the times below.)<p>"
146 write (unit=trace_unit,fmt='(A)') &
147 "No estimate can be made of time in trace itself.<p>"
150 write(unit=trace_unit, &
151 fmt='("<TABLE BORDER>")')
152 write(unit=trace_unit, &
153 fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
154 write(unit=trace_unit, &
155 fmt='("CPU Time (seconds)<TH>Speed up")')
156 write(unit=trace_unit, &
157 fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum <TH>Total<TH>%<TH>Minimum<TH>Maximum")')
158 write(unit=trace_unit, &
159 fmt='("<TH>",I4," PE")') num_procs
161 do i=MasterNoRoutines,1,-1
164 if (TotalCPUTime(1) > 0.0) then
165 PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
170 if (TotalElapsedTime > 0.0) then
171 PercentElapsedTime=100.0*OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
173 PercentElapsedTime=100.0
176 if (OverallElapsedTime(Pointer) > 0.0) then
177 SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(CountRate*num_procs))
182 ! This horrible solution as MinLOC does not take a DIM argument, so sum
183 ! is needed to convert the array to an integer
185 MinElapsedPos = sum(MinLOC(MasterElapsedTimeLocal(:,Pointer)))-1
186 MaxElapsedPos = sum(MAXLOC(MasterElapsedTimeLocal(:,Pointer)))-1
187 MinCPUPos = sum(MinLOC(MasterCPUTimeLocal(:,Pointer)))-1
188 MaxCPUPos = sum(MAXLOC(MasterCPUTimeLocal(:,Pointer)))-1
190 !----------------------------------------------------------------------
191 ! Write out results. Note the abnormally long format line is needed as
192 ! the NAG compiler complains if a quoted line is split.
193 !----------------------------------------------------------------------
195 write (unit=trace_unit, fmt='(7A)') &
196 "<TR><TD><a href=", &
197 trim(Documentation_url), &
199 trim(MasterTimerNames(Pointer)), & ! Subroutine name
201 trim(MasterTimerNames(Pointer)), & ! Subroutine name
203 write (unit=trace_unit, &
204 fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
205 real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE
206 OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time
207 PercentElapsedTime, & ! Percent Elapsed Time
208 MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
209 MinElapsedPos, & ! Which PE
210 MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
211 MaxElapsedPos, & ! Which PE
212 OverallCPUTime(Pointer), & ! CPU time
213 PercentCPUTime, & ! Percent CPU time
214 MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time
215 MinCPUPos, & ! Which PE
216 MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time
217 MaxCPUPos, & ! Which PE
220 write(unit=trace_csv_unit, &
221 fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
223 '"'//trim(MasterTimerNames(Pointer))//'",', &
224 real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE
225 OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time
226 PercentElapsedTime, & ! Percent Elapsed Time
227 MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
228 MinElapsedPos, & ! Which PE
229 MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
230 MaxElapsedPos, & ! Which PE
231 OverallCPUTime(Pointer), & ! CPU time
232 PercentCPUTime, & ! Percent CPU time
233 MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time
234 MinCPUPos, & ! Which PE
235 MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time
236 MaxCPUPos, & ! Which PE
241 write (unit=trace_unit, &
242 fmt='(A,I4,A,F8.1,A,F8.1,A)') &
243 "<TR><TD><B>Total</B>",MasterNoRoutines, "<TD></TD><TD><B>", &
244 TotalElapsedTime/real(CountRate), &
245 "</B><TD></TD><TD></TD><TD></TD><TD><B>", &
246 TotalCPUTime(1),"</B><TD></TD><TD></TD><TD></TD>"
247 if (TotalElapsedTime > 0.0) then
248 write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
249 (TotalCPUTime(1))/(TotalElapsedTime/real(CountRate))
251 write(unit=trace_unit, &
252 fmt='("</TABLE><p><p>")')
255 write(unit=trace_csv_unit,fmt=*) " "
258 !===================================================================================
259 ! Sort subroutines into time order based on overall Elapsed Time.
260 ! All PEs should have the same sort order after the sum.
261 !===================================================================================
263 do j=1,MasterNoRoutines
264 rtemp1(j)=sum(MasterElapsedTime(:,j))
267 call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
269 do j=1,MasterNoRoutines
270 OverallElapsedTime(j)=sum(MasterElapsedTime(:,j))
271 OverallCPUTime(j)=sum(MasterCPUTime(:,j))
274 ! Output timing information
276 write(unit=trace_unit, &
277 fmt='("</pre><hr><a name=overall><h2>Overall Timing Summary</h2></a>")')
279 write(unit=trace_unit, &
280 fmt='("<TABLE BORDER>")')
281 write(unit=trace_unit, &
282 fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
283 write(unit=trace_unit, &
284 fmt='("CPU Time (seconds)<TH>Speed up")')
285 write(unit=trace_unit, &
286 fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum<TH>Total<TH>%<TH>Minimum<TH>Maximum")')
287 write(unit=trace_unit, &
288 fmt='("<TH>",I4," PE")') num_procs
290 do i=MasterNoRoutines,1,-1
293 if (TotalCPUTime(1) > 0.0) then
294 PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
299 if (TotalElapsedTime > 0.0) then
300 PercentElapsedTime=100.0 * OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
302 PercentElapsedTime=100.0
305 if (OverallElapsedTime(Pointer) > 0.0) then
306 SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(num_procs*CountRate))
311 ! This horrible solution as MinLOC does not take a DIM argument, so
312 ! sum is needed to convert the array to an integer
314 MinElapsedPos = sum(MinLOC(MasterElapsedTime(:,Pointer)))-1
315 MaxElapsedPos = sum(MAXLOC(MasterElapsedTime(:,Pointer)))-1
316 MinCPUPos = sum(MinLOC(MasterCPUTime(:,Pointer)))-1
317 MaxCPUPos = sum(MaxLOC(MasterCPUTime(:,Pointer)))-1
319 !----------------------------------------------------------------------
320 ! Write out results. Note the abnormally long format line is needed as
321 ! the NAG compiler complains if a quoted line is split.
322 !----------------------------------------------------------------------
324 write (unit=trace_unit, fmt='(7A)') &
325 "<TR><TD><a href=", &
326 trim(Documentation_url), &
328 trim(MasterTimerNames(Pointer)), & ! Subroutine name
330 trim(MasterTimerNames(Pointer)), & ! Subroutine name
332 write (unit=trace_unit, &
333 fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
334 real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE
335 OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time
336 PercentElapsedTime, & ! Percent Elapsed Time
337 MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
338 MinElapsedPos, & ! Which PE
339 MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
340 MaxElapsedPos, & ! Which PE
341 OverallCPUTime(Pointer), & ! CPU time
342 PercentCPUTime, & ! Percent CPU time
343 MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time
344 MinCPUPos, & ! Which PE
345 MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time
346 MaxCPUPos, & ! Which PE
349 write (unit=trace_csv_unit, &
350 fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
352 '"'//trim(MasterTimerNames(Pointer))//'",', &
353 real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE
354 OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time
355 PercentElapsedTime, & ! Percent Elapsed Time
356 MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time
357 MinElapsedPos, & ! Which PE
358 MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time
359 MaxElapsedPos, & ! Which PE
360 OverallCPUTime(Pointer), & ! CPU time
361 PercentCPUTime, & ! Percent CPU time
362 MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time
363 MinCPUPos, & ! Which PE
364 MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time
365 MaxCPUPos, & ! Which PE
370 write (unit=trace_unit, &
371 fmt='(A,I4,A,F8.1,A,F8.1,A)') &
372 "<TR><TD><B>Total</B>",MasterNoRoutines, "</TD><TD><TD><B>", &
373 TotalElapsedTime/real(CountRate), &
374 "</B><TD></TD><TD></TD><TD></TD><TD><B>",TotalCPUTime(1), &
375 "</B><TD></TD><TD></TD><TD></TD>"
376 if (TotalElapsedTime > 0.0) then
377 write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
378 TotalCPUTime(1)/(TotalElapsedTime/real(CountRate))
381 write(unit=trace_unit, &
385 write(unit=trace_csv_unit,fmt=*) " "
390 !============================================================================
391 ! Sort subroutines into memory use order by max memory on one PE
392 !============================================================================
394 if (trace_memory) then
398 call da_proc_sum_ints(MasterMaxHeap(:,j))
403 do j=1,MasterNoRoutines
404 itemp1(j)=MAXVAL(MasterMaxHeap(:,j))
407 call da_trace_int_sort(itemp1,MasterNoRoutines,index)
409 write (unit=trace_unit,fmt='("<hr><a name=memory><h2>Maximum memory usage for routines</h2></a>")')
410 write (unit=trace_unit,fmt='("<TABLE BORDER>")')
411 write (unit=trace_unit,fmt='("<TR><TH>Routine<TH>Max in any PE (kbytes)")')
412 write (unit=trace_unit,fmt='("<TH>Overall (kbytes)<TH>Average per PE (kbytes)")')
414 do i=MasterNoRoutines,1,-1
416 write (unit=trace_unit, &
417 fmt='("<TR><TD><a href=",A,"/",A,".html>",A,"</a><TD>",I15,"<TD>",I15,"<TD>",I15)') &
418 trim(Documentation_url),trim(TimerNames(Pointer)),trim(TimerNames(Pointer)), &
419 MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
420 sum(MasterMaxHeap(:,Pointer))/num_procs
422 write (unit=trace_csv_unit, &
423 fmt='(2A,I15,",",I15,",",I15)') &
425 '"'//trim(TimerNames(Pointer))//'",', &
426 MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
427 sum(MasterMaxHeap(:,Pointer))/num_procs
430 write (unit=trace_unit,fmt='("</table></body></html>")')
433 write(unit=trace_csv_unit,fmt=*)
438 if (trace_write .AND. trace_unit /= 0) then
442 if (trace_csv .and. rootproc) then
443 close(trace_csv_unit)
446 if (myproc == 0) then
450 end subroutine da_trace_report