Merge branch 'maint'
[hoomd-blue.git] / libhoomd / utils / Profiler.cc
bloba56c7f5903d2829c032b6bc611407a2ed9b64dae
1 /*
2 Highly Optimized Object-oriented Many-particle Dynamics -- Blue Edition
3 (HOOMD-blue) Open Source Software License Copyright 2009-2014 The Regents of
4 the University of Michigan All rights reserved.
6 HOOMD-blue may contain modifications ("Contributions") provided, and to which
7 copyright is held, by various Contributors who have granted The Regents of the
8 University of Michigan the right to modify and/or distribute such Contributions.
10 You may redistribute, use, and create derivate works of HOOMD-blue, in source
11 and binary forms, provided you abide by the following conditions:
13 * Redistributions of source code must retain the above copyright notice, this
14 list of conditions, and the following disclaimer both in the code and
15 prominently in any materials provided with the distribution.
17 * Redistributions in binary form must reproduce the above copyright notice, this
18 list of conditions, and the following disclaimer in the documentation and/or
19 other materials provided with the distribution.
21 * All publications and presentations based on HOOMD-blue, including any reports
22 or published results obtained, in whole or in part, with HOOMD-blue, will
23 acknowledge its use according to the terms posted at the time of submission on:
24 http://codeblue.umich.edu/hoomd-blue/citations.html
26 * Any electronic documents citing HOOMD-Blue will link to the HOOMD-Blue website:
27 http://codeblue.umich.edu/hoomd-blue/
29 * Apart from the above required attributions, neither the name of the copyright
30 holder nor the names of HOOMD-blue's contributors may be used to endorse or
31 promote products derived from this software without specific prior written
32 permission.
34 Disclaimer
36 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS ``AS IS'' AND
37 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
38 WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND/OR ANY
39 WARRANTIES THAT THIS SOFTWARE IS FREE OF INFRINGEMENT ARE DISCLAIMED.
41 IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
42 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
43 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
45 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
46 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 // Maintainer: joaander
52 /*! \file Profiler.cc
53 \brief Defines the Profiler class
56 #include <iomanip>
57 #include <sstream>
59 #ifdef WIN32
60 #pragma warning( push )
61 #pragma warning( disable : 4103 4244 )
62 #endif
64 #include "Profiler.h"
66 #include <boost/python.hpp>
67 using namespace boost::python;
68 using namespace std;
70 ////////////////////////////////////////////////////
71 // ProfileDataElem members
72 int64_t ProfileDataElem::getChildElapsedTime() const
74 // start counting the elapsed time from our time
75 int64_t total = 0;
77 // for each of the children
78 map<string, ProfileDataElem>::const_iterator i;
79 for (i = m_children.begin(); i != m_children.end(); ++i)
81 // add their time
82 total += (*i).second.m_elapsed_time;
85 // return the total
86 return total;
88 int64_t ProfileDataElem::getTotalFlopCount() const
90 // start counting the elapsed time from our time
91 int64_t total = m_flop_count;
93 // for each of the children
94 map<string, ProfileDataElem>::const_iterator i;
95 for (i = m_children.begin(); i != m_children.end(); ++i)
97 // add their time
98 total += (*i).second.getTotalFlopCount();
101 // return the total
102 return total;
104 int64_t ProfileDataElem::getTotalMemByteCount() const
106 // start counting the elapsed time from our time
107 int64_t total = m_mem_byte_count;
109 // for each of the children
110 map<string, ProfileDataElem>::const_iterator i;
111 for (i = m_children.begin(); i != m_children.end(); ++i)
113 // add their time
114 total += (*i).second.getTotalMemByteCount();
117 // return the total
118 return total;
121 /*! Recursive output routine to write results from this profile node and all sub nodes printed in
122 a tree.
123 \param o stream to write output to
124 \param name Name of the node
125 \param tab_level Current number of tabs in the tree
126 \param total_time Total number of nanoseconds taken by this node
127 \param name_width Maximum name width for all siblings of this node (used to align output columns)
129 void ProfileDataElem::output(std::ostream &o, const std::string& name, int tab_level, int64_t total_time, int name_width) const
131 // create a tab string to output for the current tab level
132 string tabs = "";
133 for (int i = 0; i < tab_level; i++)
134 tabs += " ";
136 o << tabs;
137 // start with an overview
138 // initial tests determined that having a parent node calculate the avg gflops of its
139 // children is annoying, so default to 0 flops&bytes unless we are a leaf
140 double sec = double(m_elapsed_time)/1e9;
141 double perc = double(m_elapsed_time)/double(total_time) * 100.0;
142 double flops = 0.0;
143 double bytes = 0.0;
144 if (m_children.size() == 0)
146 flops = double(getTotalFlopCount())/sec;
147 bytes = double(getTotalMemByteCount())/sec;
150 output_line(o, name, sec, perc, flops, bytes, name_width);
152 // start by determining the name width
153 map<string, ProfileDataElem>::const_iterator i;
155 // it has at least to be four letters wide ("Self")
156 int child_max_width = 4;
157 for (i = m_children.begin(); i != m_children.end(); ++i)
159 int child_width = (int)(*i).first.size();
160 if (child_width > child_max_width)
161 child_max_width = child_width;
164 // output each of the children
165 for (i = m_children.begin(); i != m_children.end(); ++i)
167 (*i).second.output(o, (*i).first, tab_level+1, total_time, child_max_width);
170 // output an "Self" item to account for time actually spent in this data elem
171 if (m_children.size() > 0)
173 double sec = double(m_elapsed_time - getChildElapsedTime())/1e9;
174 double perc = double(m_elapsed_time - getChildElapsedTime())/double(total_time) * 100.0;
175 double flops = double(m_flop_count)/sec;
176 double bytes = double(m_mem_byte_count)/sec;
178 // don't print Self unless perc is significant
179 if (perc >= 0.1)
181 o << tabs << " ";
182 output_line(o, "Self", sec, perc, flops, bytes, child_max_width);
187 void ProfileDataElem::output_line(std::ostream &o,
188 const std::string &name,
189 double sec,
190 double perc,
191 double flops,
192 double bytes,
193 unsigned int name_width) const
195 o << setiosflags(ios::fixed);
197 o << name << ": ";
198 assert(name_width >= name.size());
199 for (unsigned int i = 0; i < name_width - name.size(); i++)
200 o << " ";
202 o << setw(7) << setprecision(4) << sec << "s";
203 o << " | " << setprecision(3) << setw(6) << perc << "% ";
205 //If sec is zero, the values to be printed are garbage. Thus, we skip it all together.
206 if (sec == 0)
208 o << "n/a" << endl;
209 return;
212 o << setprecision(5);
213 // output flops with intelligent units
214 if (flops > 0)
216 o << setw(6);
217 if (flops < 1e6)
218 o << flops << " FLOP/s ";
219 else if (flops < 1e9)
220 o << flops/1e6 << " MFLOP/s ";
221 else
222 o << flops/1e9 << " GFLOP/s ";
225 //output bytes/s with intelligent units
226 if (bytes > 0)
228 o << setw(6);
229 if (bytes < 1e6)
230 o << bytes << " B/s ";
231 else if (bytes < 1e9)
232 o << bytes/1e6 << " MiB/s ";
233 else
234 o << bytes/1e9 << " GiB/s ";
237 o << endl;
240 ////////////////////////////////////////////////////////////////////
241 // Profiler
243 Profiler::Profiler(const std::string& name) : m_name(name)
245 // push the root onto the top of the stack so that it is the default
246 m_stack.push(&m_root);
248 // record the start of this profile
249 m_root.m_start_time = m_clk.getTime();
251 #ifdef SCOREP_USER_ENABLE
252 SCOREP_USER_REGION_BEGIN(m_root.m_scorep_region, name.c_str(),SCOREP_USER_REGION_TYPE_COMMON )
253 #endif
256 void Profiler::output(std::ostream &o)
258 // perform a sanity check, but don't bail out
259 if (m_stack.top() != &m_root)
261 o << "***Warning! Outputting a profile with incomplete samples" << endl;
264 #ifdef SCOREP_USER_ENABLE
265 SCOREP_USER_REGION_END( m_root.m_scorep_region )
266 #endif
268 // outputting a profile implicitly calls for a time sample
269 m_root.m_elapsed_time = m_clk.getTime() - m_root.m_start_time;
271 // startup the recursive output process
272 m_root.output(o, m_name, 0, m_root.m_elapsed_time, (int)m_name.size());
275 /*! \param o Stream to output to
276 \param prof Profiler to print
278 std::ostream& operator<<(ostream &o, Profiler& prof)
280 prof.output(o);
282 return o;
285 //! Helper function to get the formatted output of a Profiler in python
286 /*! Outputs the profiler timings to a string
287 \param prof Profiler to generate output from
289 string print_profiler(Profiler *prof)
291 assert(prof);
292 ostringstream s;
293 s << *prof;
294 return s.str();
297 void export_Profiler()
299 class_<Profiler>("Profiler", init<const std::string&>())
300 .def("__str__", &print_profiler)
304 #ifdef WIN32
305 #pragma warning( pop )
306 #endif