3 ########################
4 # Function definitions #
5 ########################
7 source "$(dirname $0)/measurement-functions"
10 local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
12 tmp
="/tmp/test-timing.$$"
16 test_output
="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev
> "$tmp"
17 read avg1 stddev1 vsz1 vszdev1 rest
< "$tmp"
18 echo "Average time: ${avg1} +/- ${stddev1} seconds." \
19 " VSZ: ${vsz1} +/- ${vszdev1} KB"
21 if [ "${rest}" != "" ]; then
22 echo "Internal error ($rest)"
28 test_output
="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev
> "$tmp"
29 read avg2 stddev2 vsz2 vszdev2 rest
< "$tmp"
30 echo "Average time: ${avg2} +/- ${stddev2} seconds." \
31 " VSZ: ${vsz2} +/- ${vszdev2} KB"
33 if [ "${rest}" != "" ]; then
34 echo "Internal error ($rest)"
40 test_output
="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev
> "$tmp"
41 read avg4 stddev4 vsz4 vszdev4 rest
< "$tmp"
42 echo "Average time: ${avg4} +/- ${stddev4} seconds." \
43 " VSZ: ${vsz4} +/- ${vszdev4} KB"
46 if [ "${rest}" != "" ]; then
47 echo "Internal error ($rest)"
52 test_output
="/dev/null" \
53 print_runtime_ratio
"${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
56 test_output
="/dev/null" \
57 print_runtime_ratio
"${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
60 test_output
="${1}-drd-with-stack-var-4.out" \
61 print_runtime_ratio
"${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
62 "$VG" --tool=drd
--first-race-only=yes --check-stack-var=yes \
63 --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
66 test_output="${1}-drd-without-stack-var-4.out
" \
67 print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
68 "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
69 --drd-stats=yes "$@
" -p${psep}${p} "${test_args}"
72 test_output
="${1}-helgrind-4-none.out" \
73 print_runtime_ratio
"${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=none "$@" -p${psep}${p} "${test_args}"
76 test_output
="${1}-helgrind-4-approx.out" \
77 print_runtime_ratio
"${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=approx "$@" -p${psep}${p} "${test_args}"
80 test_output
="${1}-helgrind-4-full.out" \
81 print_runtime_ratio
"${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=full "$@" -p${psep}${p} "${test_args}"
87 ########################
89 ########################
91 DRD_SCRIPTS_DIR
="$(dirname $0)"
92 if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
93 DRD_SCRIPTS_DIR
="$PWD/$DRD_SCRIPTS_DIR"
96 SPLASH2
="${DRD_SCRIPTS_DIR}/../splash2"
97 if [ ! -e "${SPLASH2}" ]; then
98 echo "Error: splash2 directory not found (${SPLASH2})."
102 if [ "$VG" = "" ]; then
103 VG
="${DRD_SCRIPTS_DIR}/../../vg-in-place"
106 if [ ! -e "$VG" ]; then
107 echo "Could not find $VG."
111 ######################################################################################################################
112 # Meaning of the different colums:
113 # 1. SPLASH2 test name.
114 # 2. Execution time in seconds for native run with argument -p1.
115 # 3. Virtual memory size in KB for the native run with argument -p1.
116 # 4. Execution time in seconds for native run with argument -p2.
117 # 5. Virtual memory size in KB for the native run with argument -p2.
118 # 6. Execution time in seconds for native run with argument -p4.
119 # 7. Virtual memory size in KB for the native run with argument -p4.
120 # 8. Execution time ratio for --tool=none -p1 versus -p1.
121 # 9. Virtual memory size ratio for --tool=none -p1 versus -p1.
122 # 10. Execution time ratio for --tool=none -p4 versus -p4.
123 # 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
124 # 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
125 # 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
126 # 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
127 # 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
128 # 16. Execution time ratio for --tool=helgrind --history-level=none -p4 versus -p4.
129 # 17. Virtual memory size ratio for --tool=helgrind --history-level=none -p4 versus -p4.
130 # 18. Execution time ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
131 # 19. Virtual memory size ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
132 # 20. Execution time ratio for --tool=helgrind --history-level=full -p4 versus -p4.
133 # 21. Virtual memory size ratio for --tool=helgrind --history-level=full -p4 versus -p4.
134 # 22. Execution time ratio for Intel Thread Checker -p4 versus -p4.
135 # 23. Execution time ratio for Intel Thread Checker -p4 versus -p4.
138 # - Both Helgrind and DRD use a granularity of one byte for data race detection.
139 # - Helgrind does detect data races on stack variables. DRD only detects
140 # data races on stack variables with --check-stack-var=yes.
141 # - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
142 # likely running a 32-bit OS. Not yet clear to me: which OS ? Which
143 # granularity does ITC use ? And which m4 macro's have been used by ITC as
144 # implementation of the synchronization primitives ?
146 # 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
147 ################################################################################################################################################
148 # Results: native native native none none DRD DRD HG HG HG ITC ITC
149 # -p1 -p2 -p4 -p1 -p4 -p4 -p4+f -p4 -p4 -p4 -p4 -p4+f
150 # ..............................................................................................................................................
151 # Cholesky 0.11 12016 0.06 22016 0.55 41328 10.3 4.92 1.7 2.14 15 2.61 8 2.61 10 3.96 10 3.96 15 6.14 239 82
152 # FFT 0.02 6692 0.02 14888 0.02 31621 17.0 8.01 20.0 2.48 114 3.15 64 3.28 81 4.52 81 4.52 116 5.56 90 41
153 # LU, contiguous 0.08 4100 0.05 12304 0.06 28712 11.1 12.44 18.5 2.64 104 3.18 70 3.18 87 4.84 89 4.84 118 5.55 428 128
154 # Ocean, contiguous 0.23 16848 0.19 25384 0.23 42528 6.3 3.78 8.3 2.11 87 2.82 62 4.02 71 3.75 71 3.75 195 5.96 90 28
155 # Radix 0.21 15136 0.14 23336 0.15 39728 12.6 4.10 22.3 2.19 61 2.87 41 2.94 52 4.03 52 4.03 85 6.13 222 56
156 # Raytrace 0.63 207104 0.49 215296 0.49 231680 8.9 1.23 12.9 1.20 385 1.38 86 2.10 158 3.70 160 3.70 222 4.15 172 53
157 # Water-n2 0.18 10696 0.09 27072 0.11 59832 12.5 5.46 26.7 1.80 3092 3.03 263 3.06 92 3.28 92 3.28 92 3.55 189 39
158 # Water-sp 0.20 4444 0.15 13536 0.10 30269 10.6 11.56 27.0 2.52 405 3.29 69 3.42 95 4.59 95 4.59 97 4.73 183 34
159 # ..............................................................................................................................................
160 # geometric mean 0.14 13024 0.10 25669 0.14 47655 10.8 5.26 13.5 2.08 161 2.71 59 3.03 66 4.05 66 4.05 95 5.13 180 51
161 # ..............................................................................................................................................
162 # Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.
163 # Software: openSUSE 11.0 (64-bit edition), runlevel 3, kernel 2.6.30.1, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10648.
164 ################################################################################################################################################
168 # - The ITC performance numbers in the above table originate from table 1 in
169 # the following paper:
170 # Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
171 # Accurate and efficient filtering for the Intel thread checker race
172 # detector, Proceedings of the 1st workshop on Architectural and system
173 # support for improving software dependability, San Jose, California,
174 # 2006. Pages: 34 - 41.
175 # - The input parameters for benchmarks below originate from table 1 in the
177 # The SPLASH-2 programs: characterization and methodological considerations
178 # Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
179 # 1995. Proceedings of the 22nd Annual International Symposium on Computer
180 # Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
181 # ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
184 cache_size
=$
(get_cache_size
)
185 log2_cache_size
=$
(log2
${cache_size})
189 cd ${SPLASH2}/codes
/kernels
/cholesky
/inputs
192 gzip -cd <$f >${f%.Z}
194 test_args
=tk15.O run_test ..
/CHOLESKY
-C$
((cache_size
))
198 run_test
${SPLASH2}/codes
/kernels
/fft
/FFT
-t -l$
((log2_cache_size
/2)) -m16
200 # LU, contiguous blocks.
201 run_test
${SPLASH2}/codes
/kernels
/lu
/contiguous_blocks
/LU
-n512
203 # LU, non-contiguous blocks.
204 #run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
207 run_test
${SPLASH2}/codes
/apps
/ocean
/contiguous_partitions
/OCEAN
-n258
208 #run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
210 # Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
211 if [ $
(uname
-p) = "i686" ]; then
212 psep
=' ' run_test
${SPLASH2}/codes
/apps
/radiosity
/RADIOSITY
-batch -room -ae 5000.0 -en 0.050 -bf 0.10
216 run_test
${SPLASH2}/codes
/kernels
/radix
/RADIX
-n$
((2**20)) -r1024
220 cd ${SPLASH2}/codes
/apps
/raytrace
/inputs
221 rm -f *.env
*.geo
*.rl
224 gzip -cd <$f >${f%.Z}
227 test_args
=inputs
/car.env psep
='' run_test .
/RAYTRACE
-m64
232 cd ${SPLASH2}/codes
/apps
/water-nsquared
233 test_input
=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep
=' ' run_test .
/WATER-NSQUARED
238 cd ${SPLASH2}/codes
/apps
/water-spatial
239 test_input
=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep
=' ' run_test .
/WATER-SPATIAL
245 # compile-command: "./run-splash2"