Merge remote-tracking branch 'origin/release-v4.6.1'
[WRF.git] / var / da / da_tracing / da_trace_report.inc
blob2090f4f0e270c1bbeec55ecbbae7d21ca30e7c5f
1 subroutine da_trace_report
3    !--------------------------------------------------------------------
4    ! Purpose: Produce a trace report
5    !--------------------------------------------------------------------
7    implicit none
9    integer :: i                        ! loop counter
10    integer :: j                        ! loop counter
11    integer :: CountRate
12    integer :: MasterNoRoutines
13    integer :: temp
14    integer :: MinElapsedPos
15    integer :: MaxElapsedPos
16    integer :: MinCPUPos
17    integer :: MaxCPUPos
18    integer :: itemp1(MaxNoRoutines)
19    integer :: MasterMaxHeap(0:num_procs-1,MaxNoRoutines)
20    integer :: MasterNoCalls(0:num_procs-1,MaxNoRoutines)
21    integer :: OverallNoCalls(MaxNoRoutines)
22    integer, allocatable :: Index(:)
24    real    :: TempCpuTime
25    real    :: TotalElapsedTime             !
26    real    :: TotalCPUTime(1)              !
27    real    :: SpeedUp                      ! speed up factor
28    real    :: PercentCPUTime               ! percentage in CPU time
29    real    :: PercentElapsedTime           ! percentage in elapsed time
30    real    :: rtemp1(MaxNoRoutines)
31    real    :: MasterElapsedTime(0:num_procs-1,MaxNoRoutines)
32    real    :: MasterElapsedTimeLocal(0:num_procs-1,MaxNoRoutines)
33    real    :: MasterCPUTime(0:num_procs-1,MaxNoRoutines)
34    real    :: MasterCPUTimeLocal(0:num_procs-1,MaxNoRoutines)
35    real    :: OverallElapsedTime(MaxNoRoutines)
36    real    :: OverallCPUTime(MaxNoRoutines)
38    character (len=TraceNameLen) :: MasterTimerNames(MaxNoRoutines)
40    if (.not. use_html) then
41       write (unit=trace_unit, fmt='(A)') &
42          "Report only available if use_html is true"
43       return
44    end if
46    call system_clock (COUNT=temp)
48    TotalElapsedTime=temp-BaseElapsedTime ! on PE 0
50    call cpu_time(TempCpuTime)
52    TotalCPUTime(1) = TempCpuTime - BaseCPUTime
54    call system_clock(&
55       COUNT_RATE=CountRate)
57    ! Ensure the lists from each PE match. use the routine list from the 
58    ! traced PE as the master copy
60    MasterTimerNames(:)=TimerNames(:)
62    if (myproc == trace_pe) then
63       MasterNoRoutines=NoRoutines
64    else
65       MasterNoRoutines=0
66    end if
68 #ifdef DM_PARALLEL
69    call da_proc_sum_int(MasterNoRoutines)
70    ! only PE 0 knows the result
72    call mpi_bcast(MasterTimerNames(1:MaxNoRoutines), &
73                   TraceNameLen*MaxNoRoutines, &
74                   MPI_character,trace_pe, comm,ierr)
75 #endif
77    MasterElapsedTime(:,:)=0.0
78    MasterCPUTime(:,:)=0.0
79    MasterElapsedTimeLocal(:,:)=0.0
80    MasterCPUTimeLocal(:,:)=0.0
81    MasterNoCalls(:,:)=0
82    MasterMaxHeap(:,:)=0
84    do i=1,MaxNoRoutines
85       do j=1,NoRoutines
86          if (TimerNames(j) == MasterTimerNames(i)) then
87             MasterElapsedTime(myproc,i)=ElapsedTime(j)
88             MasterCPUTime(myproc,i)=CPUTime(j)
89             MasterElapsedTimeLocal(myproc,i)=ElapsedTimeLocal(j)
90             MasterCPUTimeLocal(myproc,i)=CPUTimeLocal(j)
91             MasterNoCalls(myproc,i)=NoCalls(j)
92             MasterMaxHeap(myproc,i)=MaxHeap(j)
93             cycle
94          end if
95       end do
96    end do
98 #ifdef DM_PARALLEL
99    do i=0,num_procs-1
100       call da_proc_sum_real(MasterElapsedTime(i,:))
101       call da_proc_sum_real(MasterCPUTime(i,:))
102       call da_proc_sum_real(MasterElapsedTimeLocal(i,:))
103       call da_proc_sum_real(MasterCPUTimeLocal(i,:))
104       call da_proc_sum_ints(MasterNoCalls(i,:))
105       call da_proc_sum_ints(MasterMaxHeap(i,:))
106    end do
107 #endif
109    if (rootproc) then
111       do j=1,MasterNoRoutines
112          rtemp1(j)=sum(MasterElapsedTimeLocal(:,j))
113       end do
114       !==========================================================================
115       ! Sort subroutines into time order based on local Elapsed Time.
116       ! All PEs should have the same sort order after the sum.
117       !==========================================================================
119       allocate (Index(MasterNoRoutines))
121       call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
123       do j=1,MasterNoRoutines
124          OverallElapsedTime(j)=sum(MasterElapsedTimeLocal(:,j))
125          OverallCPUTime(j)=sum(MasterCPUTimeLocal(:,j))
126          OverallNoCalls(j)=sum(MasterNoCalls(:,j))
127       end do
129       write(unit=trace_unit, fmt='(A,I4,A)') &
130          "</pre><hr><H1>For PE",trace_pe,"</H1>"
132       ! Output timing information
134       write(unit=trace_unit, &
135          fmt='("<a name=local><h2>Local Timing Summary</h2></a>")')
137       if (num_procs == 1) then
138          ! needs changing to work MPP
139          write (unit=trace_unit, &
140             fmt='("(Tracing itself took ",F8.1,"s CPU Time, and ",F8.1,a)') &
141             (CPUTimeLocalStart-CPUTimeStart(1)-TotalCPUTime(1)), &
142             (ElapsedTimeLocalStart-ElapsedTimeStart(1)-TotalElapsedTime) / &
143             real(CountRate), &
144             "s elapsed time during the run. This is not included in the times below.)<p>"
145       else
146          write (unit=trace_unit,fmt='(A)') &
147             "No estimate can be made of time in trace itself.<p>"
148       end if
150       write(unit=trace_unit, &
151          fmt='("<TABLE BORDER>")')
152       write(unit=trace_unit, &
153          fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
154       write(unit=trace_unit, &
155          fmt='("CPU Time (seconds)<TH>Speed up")')
156       write(unit=trace_unit, &
157          fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum <TH>Total<TH>%<TH>Minimum<TH>Maximum")')
158       write(unit=trace_unit, &
159          fmt='("<TH>",I4," PE")') num_procs
161       do i=MasterNoRoutines,1,-1
162          Pointer=index(i)    
164          if (TotalCPUTime(1) > 0.0) then
165             PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
166          else
167            PercentCPUTime=100.0
168          end if
170          if (TotalElapsedTime > 0.0) then
171             PercentElapsedTime=100.0*OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
172          else
173             PercentElapsedTime=100.0
174          end if
176          if (OverallElapsedTime(Pointer) > 0.0) then
177             SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(CountRate*num_procs))
178          else
179             SpeedUp = 0.0
180          end if
182          ! This horrible solution as MinLOC does not take a DIM argument, so sum
183          ! is needed to convert the array to an integer
185          MinElapsedPos = sum(MinLOC(MasterElapsedTimeLocal(:,Pointer)))-1
186          MaxElapsedPos = sum(MAXLOC(MasterElapsedTimeLocal(:,Pointer)))-1
187          MinCPUPos     = sum(MinLOC(MasterCPUTimeLocal(:,Pointer)))-1
188          MaxCPUPos     = sum(MAXLOC(MasterCPUTimeLocal(:,Pointer)))-1
190          !----------------------------------------------------------------------
191          ! Write out results. Note the abnormally long format line is needed as
192          ! the NAG compiler complains if a quoted line is split.
193          !----------------------------------------------------------------------
195          write (unit=trace_unit, fmt='(7A)') &
196             "<TR><TD><a href=", &
197             trim(Documentation_url), &
198             "/", &
199             trim(MasterTimerNames(Pointer)), & ! Subroutine name
200             ".html>", &
201             trim(MasterTimerNames(Pointer)), & ! Subroutine name
202             "</a>"
203          write (unit=trace_unit, &
204             fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
205             real(OverallNoCalls(Pointer))/real(num_procs),                       & ! Average number of calls per PE
206             OverallElapsedTime(Pointer)/(num_procs*real(CountRate)),             & ! Average Elapsed Time
207             PercentElapsedTime,                                              & ! Percent Elapsed Time
208             MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate),   & ! Min average Elapsed Time
209             MinElapsedPos,                                                   & ! Which PE
210             MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate),   & ! Max average Elapsed Time
211             MaxElapsedPos,                                                   & ! Which PE
212             OverallCPUTime(Pointer),                                         & ! CPU time
213             PercentCPUTime,                                                  & ! Percent CPU time
214             MasterCPUTimeLocal(MinCPUPos,Pointer),                           & ! Min average CPU Time
215             MinCPUPos,                                                       & ! Which PE
216             MasterCPUTimeLocal(MaxCPUPos,Pointer),                           & ! Max average CPU Time
217             MaxCPUPos,                                                       & ! Which PE
218             SpeedUp                                                            ! Speedup
219          if (trace_csv) then
220             write(unit=trace_csv_unit,  &
221                fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
222                '"local",', &
223                '"'//trim(MasterTimerNames(Pointer))//'",', &
224                real(OverallNoCalls(Pointer))/real(num_procs),                       & ! Average number of calls per PE
225                OverallElapsedTime(Pointer)/(num_procs*real(CountRate)),             & ! Average Elapsed Time
226                PercentElapsedTime,                                              & ! Percent Elapsed Time
227                MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate),   & ! Min average Elapsed Time
228                MinElapsedPos,                                                   & ! Which PE
229                MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate),   & ! Max average Elapsed Time
230                MaxElapsedPos,                                                   & ! Which PE
231                OverallCPUTime(Pointer),                                         & ! CPU time
232                PercentCPUTime,                                                  & ! Percent CPU time
233                MasterCPUTimeLocal(MinCPUPos,Pointer),                           & ! Min average CPU Time
234                MinCPUPos,                                                       & ! Which PE
235                MasterCPUTimeLocal(MaxCPUPos,Pointer),                           & ! Max average CPU Time
236                MaxCPUPos,                                                       & ! Which PE
237                SpeedUp                                                            ! Speedup
238          end if
239       end do
241       write (unit=trace_unit, &
242          fmt='(A,I4,A,F8.1,A,F8.1,A)') &
243          "<TR><TD><B>Total</B>",MasterNoRoutines, "<TD></TD><TD><B>", &
244          TotalElapsedTime/real(CountRate), &
245          "</B><TD></TD><TD></TD><TD></TD><TD><B>", &
246          TotalCPUTime(1),"</B><TD></TD><TD></TD><TD></TD>"
247       if (TotalElapsedTime > 0.0) then
248          write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
249             (TotalCPUTime(1))/(TotalElapsedTime/real(CountRate))
250       end if
251       write(unit=trace_unit, &
252          fmt='("</TABLE><p><p>")')
254       if (trace_csv) then
255          write(unit=trace_csv_unit,fmt=*) " "
256       end if
258       !===================================================================================
259       ! Sort subroutines into time order based on overall Elapsed Time.
260       ! All PEs should have the same sort order after the sum. 
261       !===================================================================================
263       do j=1,MasterNoRoutines
264          rtemp1(j)=sum(MasterElapsedTime(:,j))
265       end do
267       call da_trace_real_sort(rtemp1,MasterNoRoutines,index)
269       do j=1,MasterNoRoutines
270          OverallElapsedTime(j)=sum(MasterElapsedTime(:,j))
271          OverallCPUTime(j)=sum(MasterCPUTime(:,j))
272       end do
274       ! Output timing information
276       write(unit=trace_unit, &
277          fmt='("</pre><hr><a name=overall><h2>Overall Timing Summary</h2></a>")')
279       write(unit=trace_unit, &
280          fmt='("<TABLE BORDER>")')
281       write(unit=trace_unit, &
282          fmt='("<TR><TH>Routine Name<TH>Calls<TH COLSPAN=4>Elapsed Time (seconds)<TH COLSPAN=4>")')
283       write(unit=trace_unit, &
284          fmt='("CPU Time (seconds)<TH>Speed up")')
285       write(unit=trace_unit, &
286          fmt='("<TR><TH></TH><TH>per PE</TH><TH>Average per PE<TH>%<TH>Minimum<TH>Maximum<TH>Total<TH>%<TH>Minimum<TH>Maximum")')
287       write(unit=trace_unit, &
288          fmt='("<TH>",I4," PE")') num_procs
290       do i=MasterNoRoutines,1,-1
291          Pointer=index(i)    
293          if (TotalCPUTime(1) > 0.0) then
294             PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1)
295          else
296             PercentCPUTime=100.0
297          end if
299          if (TotalElapsedTime > 0.0) then
300             PercentElapsedTime=100.0 * OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime)
301          else
302             PercentElapsedTime=100.0
303          end if
305          if (OverallElapsedTime(Pointer) > 0.0) then
306             SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(num_procs*CountRate))
307          else
308             SpeedUp = 0.0
309          end if
311          ! This horrible solution as MinLOC does not take a DIM argument, so 
312          ! sum is needed to convert the array to an integer
314          MinElapsedPos = sum(MinLOC(MasterElapsedTime(:,Pointer)))-1
315          MaxElapsedPos = sum(MAXLOC(MasterElapsedTime(:,Pointer)))-1
316          MinCPUPos     = sum(MinLOC(MasterCPUTime(:,Pointer)))-1
317          MaxCPUPos     = sum(MaxLOC(MasterCPUTime(:,Pointer)))-1
319          !----------------------------------------------------------------------
320          ! Write out results. Note the abnormally long format line is needed as
321          ! the NAG compiler complains if a quoted line is split.
322          !----------------------------------------------------------------------
324          write (unit=trace_unit, fmt='(7A)') &
325             "<TR><TD><a href=", &
326             trim(Documentation_url), &
327             "/", &
328             trim(MasterTimerNames(Pointer)), &    ! Subroutine name
329             ".html>", &
330             trim(MasterTimerNames(Pointer)), &    ! Subroutine name
331             "</a>"
332          write (unit=trace_unit, &
333             fmt='("<TD>",F10.1,2("<TD>",F10.2,"<TD>",F10.1,2("<TD>",F10.1," on",I3)),"<TD>",F5.2)') &
334             real(OverallNoCalls(Pointer))/real(num_procs),                  & ! Number of calls per PE
335             OverallElapsedTime(Pointer)/(real(num_procs*CountRate)),        & ! Average Elapsed Time
336             PercentElapsedTime,                                         & ! Percent Elapsed Time
337             MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate),   & ! Min average Elapsed Time
338             MinElapsedPos,                                              & ! Which PE
339             MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate),   & ! Max average Elapsed Time
340             MaxElapsedPos,                                              & ! Which PE
341             OverallCPUTime(Pointer),                                    & ! CPU time
342             PercentCPUTime,                                             & ! Percent CPU time
343             MasterCPUTime(MinCPUPos,Pointer),                           & ! Min average CPU Time
344             MinCPUPos,                                                  & ! Which PE
345             MasterCPUTime(MaxCPUPos,Pointer),                           & ! Max average CPU Time
346             MaxCPUPos,                                                  & ! Which PE
347             SpeedUp                                                       ! SpeedUp
348          if (trace_csv) then
349             write (unit=trace_csv_unit, &
350                fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') &
351                '"overall",', &
352                '"'//trim(MasterTimerNames(Pointer))//'",', &
353                real(OverallNoCalls(Pointer))/real(num_procs),                  & ! Number of calls per PE
354                OverallElapsedTime(Pointer)/(real(num_procs*CountRate)),        & ! Average Elapsed Time
355                PercentElapsedTime,                                         & ! Percent Elapsed Time
356                MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate),   & ! Min average Elapsed Time
357                MinElapsedPos,                                              & ! Which PE
358                MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate),   & ! Max average Elapsed Time
359                MaxElapsedPos,                                              & ! Which PE
360                OverallCPUTime(Pointer),                                    & ! CPU time
361                PercentCPUTime,                                             & ! Percent CPU time
362                MasterCPUTime(MinCPUPos,Pointer),                           & ! Min average CPU Time
363                MinCPUPos,                                                  & ! Which PE
364                MasterCPUTime(MaxCPUPos,Pointer),                           & ! Max average CPU Time
365                MaxCPUPos,                                                  & ! Which PE
366                SpeedUp                                                       ! SpeedUp
367          end if
368       end do
370       write (unit=trace_unit, &
371          fmt='(A,I4,A,F8.1,A,F8.1,A)') &
372          "<TR><TD><B>Total</B>",MasterNoRoutines, "</TD><TD><TD><B>", &
373          TotalElapsedTime/real(CountRate), &
374          "</B><TD></TD><TD></TD><TD></TD><TD><B>",TotalCPUTime(1), &
375          "</B><TD></TD><TD></TD><TD></TD>"
376       if (TotalElapsedTime > 0.0) then
377          write (unit=trace_unit, fmt='("<TD><B>",F8.1,"</B>")') &
378             TotalCPUTime(1)/(TotalElapsedTime/real(CountRate))
379       end if
381       write(unit=trace_unit, &
382          fmt='("</TABLE>")')
384       if (trace_csv) then
385          write(unit=trace_csv_unit,fmt=*) " "
386       end if
388    end if ! rootproc
390    !============================================================================
391    ! Sort subroutines into memory use order by max memory on one PE
392    !============================================================================
394    if (trace_memory) then
396 #ifdef DM_PARALLEL
397       do j=1,MaxNoRoutines
398          call da_proc_sum_ints(MasterMaxHeap(:,j))
399       end do
400 #endif
402       if (rootproc) then
403          do j=1,MasterNoRoutines
404             itemp1(j)=MAXVAL(MasterMaxHeap(:,j))
405          end do
407          call da_trace_int_sort(itemp1,MasterNoRoutines,index)
409          write (unit=trace_unit,fmt='("<hr><a name=memory><h2>Maximum memory usage for routines</h2></a>")')
410          write (unit=trace_unit,fmt='("<TABLE BORDER>")')
411          write (unit=trace_unit,fmt='("<TR><TH>Routine<TH>Max in any PE (kbytes)")')
412          write (unit=trace_unit,fmt='("<TH>Overall (kbytes)<TH>Average per PE (kbytes)")')
414          do i=MasterNoRoutines,1,-1
415             Pointer=index(i)
416             write (unit=trace_unit, &
417                fmt='("<TR><TD><a href=",A,"/",A,".html>",A,"</a><TD>",I15,"<TD>",I15,"<TD>",I15)') &
418                trim(Documentation_url),trim(TimerNames(Pointer)),trim(TimerNames(Pointer)), &
419                MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
420                sum(MasterMaxHeap(:,Pointer))/num_procs
421             if (trace_csv) then
422                write (unit=trace_csv_unit, &
423                   fmt='(2A,I15,",",I15,",",I15)') &
424                   '"memory",', &
425                   '"'//trim(TimerNames(Pointer))//'",', &
426                   MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), &
427                   sum(MasterMaxHeap(:,Pointer))/num_procs
428             end if        
429          end do
430          write (unit=trace_unit,fmt='("</table></body></html>")')
432          if (trace_csv) then
433             write(unit=trace_csv_unit,fmt=*)
434          end if
435       end if
436    end if
438    if (trace_write .AND. trace_unit /= 0) then
439       close(trace_unit)
440    end if
441   
442    if (trace_csv .and. rootproc) then
443       close(trace_csv_unit)
444    end if
446    if (myproc == 0) then
447       deallocate(index)
448    end if
450 end subroutine da_trace_report