Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / lib / libsqlite / tool / spaceanal.tcl
blob093a99a648c4aaa6de4fd49a674909379ea7242b
2 #pragma ident "%Z%%M% %I% %E% SMI"
4 # Run this TCL script using "testfixture" in order get a report that shows
5 # how much disk space is used by a particular data to actually store data
6 # versus how much space is unused.
9 # Get the name of the database to analyze
11 if {[llength $argv]!=1} {
12 puts stderr "Usage: $argv0 database-name"
13 exit 1
15 set file_to_analyze [lindex $argv 0]
16 if {![file exists $file_to_analyze]} {
17 puts stderr "No such file: $file_to_analyze"
18 exit 1
20 if {![file readable $file_to_analyze]} {
21 puts stderr "File is not readable: $file_to_analyze"
22 exit 1
24 if {[file size $file_to_analyze]<2048} {
25 puts stderr "Empty or malformed database: $file_to_analyze"
26 exit 1
29 # Open the database
31 sqlite db [lindex $argv 0]
32 set DB [btree_open [lindex $argv 0]]
34 # In-memory database for collecting statistics
36 sqlite mem :memory:
37 set tabledef\
38 {CREATE TABLE space_used(
39 name clob, -- Name of a table or index in the database file
40 tblname clob, -- Name of associated table
41 is_index boolean, -- TRUE if it is an index, false for a table
42 nentry int, -- Number of entries in the BTree
43 payload int, -- Total amount of data stored in this table or index
44 mx_payload int, -- Maximum payload size
45 n_ovfl int, -- Number of entries that overflow
46 pri_pages int, -- Number of primary pages used
47 ovfl_pages int, -- Number of overflow pages used
48 pri_unused int, -- Number of unused bytes on primary pages
49 ovfl_unused int -- Number of unused bytes on overflow pages
50 );}
51 mem eval $tabledef
53 # This query will be used to find the root page number for every index and
54 # table in the database.
56 set sql {
57 SELECT name, tbl_name, type, rootpage
58 FROM sqlite_master WHERE type IN ('table','index')
59 UNION ALL
60 SELECT 'sqlite_master', 'sqlite_master', 'table', 2
61 ORDER BY 1
64 # Analyze every table in the database, one at a time.
66 foreach {name tblname type rootpage} [db eval $sql] {
67 puts stderr "Analyzing $name..."
68 set cursor [btree_cursor $DB $rootpage 0]
69 set go [btree_first $cursor]
70 set size 0
71 catch {unset pg_used}
72 set unused_ovfl 0
73 set n_overflow 0
74 set cnt_ovfl 0
75 set n_entry 0
76 set mx_size 0
77 set pg_used($rootpage) 1016
78 while {$go==0} {
79 incr n_entry
80 set payload [btree_payload_size $cursor]
81 incr size $payload
82 set stat [btree_cursor_dump $cursor]
83 set pgno [lindex $stat 0]
84 set freebytes [lindex $stat 4]
85 set pg_used($pgno) $freebytes
86 if {$payload>236} {
87 # if {[lindex $stat 8]==0} {error "overflow is empty with $payload"}
88 set n [expr {($payload-236+1019)/1020}]
89 incr n_overflow $n
90 incr cnt_ovfl
91 incr unused_ovfl [expr {$n*1020+236-$payload}]
92 } else {
93 # if {[lindex $stat 8]!=0} {error "overflow not empty with $payload"}
95 if {$payload>$mx_size} {set mx_size $payload}
96 set go [btree_next $cursor]
98 btree_close_cursor $cursor
99 set n_primary [llength [array names pg_used]]
100 set unused_primary 0
101 foreach x [array names pg_used] {incr unused_primary $pg_used($x)}
102 regsub -all ' $name '' name
103 set sql "INSERT INTO space_used VALUES('$name'"
104 regsub -all ' $tblname '' tblname
105 append sql ",'$tblname',[expr {$type=="index"}],$n_entry"
106 append sql ",$size,$mx_size,$cnt_ovfl,"
107 append sql "$n_primary,$n_overflow,$unused_primary,$unused_ovfl);"
108 mem eval $sql
111 # Generate a single line of output in the statistics section of the
112 # report.
114 proc statline {title value {extra {}}} {
115 set len [string length $title]
116 set dots [string range {......................................} $len end]
117 set len [string length $value]
118 set sp2 [string range { } $len end]
119 if {$extra ne ""} {
120 set extra " $extra"
122 puts "$title$dots $value$sp2$extra"
125 # Generate a formatted percentage value for $num/$denom
127 proc percent {num denom} {
128 if {$denom==0.0} {return ""}
129 set v [expr {$num*100.0/$denom}]
130 if {$v>1.0 && $v<99.0} {
131 return [format %4.1f%% $v]
132 } elseif {$v<0.1 || $v>99.9} {
133 return [format %6.3f%% $v]
134 } else {
135 return [format %5.2f%% $v]
139 # Generate a subreport that covers some subset of the database.
140 # the $where clause determines which subset to analyze.
142 proc subreport {title where} {
143 set hit 0
144 mem eval "SELECT sum(nentry) AS nentry, \
145 sum(payload) AS payload, \
146 sum(CASE is_index WHEN 1 THEN 0 ELSE payload-4*nentry END) \
147 AS data, \
148 max(mx_payload) AS mx_payload, \
149 sum(n_ovfl) as n_ovfl, \
150 sum(pri_pages) AS pri_pages, \
151 sum(ovfl_pages) AS ovfl_pages, \
152 sum(pri_unused) AS pri_unused, \
153 sum(ovfl_unused) AS ovfl_unused \
154 FROM space_used WHERE $where" {} {set hit 1}
155 if {!$hit} {return 0}
156 puts ""
157 set len [string length $title]
158 incr len 5
159 set stars "***********************************"
160 append stars $stars
161 set stars [string range $stars $len end]
162 puts "*** $title $stars"
163 puts ""
164 statline "Percentage of total database" \
165 [percent [expr {$pri_pages+$ovfl_pages}] $::file_pgcnt]
166 statline "Number of entries" $nentry
167 set storage [expr {($pri_pages+$ovfl_pages)*1024}]
168 statline "Bytes of storage consumed" $storage
169 statline "Bytes of payload" $payload [percent $payload $storage]
170 statline "Bytes of data" $data [percent $data $storage]
171 set key [expr {$payload-$data}]
172 statline "Bytes of key" $key [percent $key $storage]
173 set avgpay [expr {$nentry>0?$payload/$nentry:0}]
174 statline "Average payload per entry" $avgpay
175 set avgunused [expr {$nentry>0?($pri_unused+$ovfl_unused)/$nentry:0}]
176 statline "Average unused bytes per entry" $avgunused
177 statline "Average fanout" \
178 [format %.2f [expr {$pri_pages==0?0:($nentry+0.0)/$pri_pages}]]
179 statline "Maximum payload per entry" $mx_payload
180 statline "Entries that use overflow" $n_ovfl [percent $n_ovfl $nentry]
181 statline "Total pages used" [set allpgs [expr {$pri_pages+$ovfl_pages}]]
182 statline "Primary pages used" $pri_pages ;# [percent $pri_pages $allpgs]
183 statline "Overflow pages used" $ovfl_pages ;# [percent $ovfl_pages $allpgs]
184 statline "Unused bytes on primary pages" $pri_unused \
185 [percent $pri_unused [expr {$pri_pages*1024}]]
186 statline "Unused bytes on overflow pages" $ovfl_unused \
187 [percent $ovfl_unused [expr {$ovfl_pages*1024}]]
188 set allunused [expr {$ovfl_unused+$pri_unused}]
189 statline "Unused bytes on all pages" $allunused \
190 [percent $allunused [expr {$allpgs*1024}]]
191 return 1
194 # Output summary statistics:
196 puts "/** Disk-Space Utilization Report For $file_to_analyze"
197 puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
198 puts ""
199 set fsize [file size [lindex $argv 0]]
200 set file_pgcnt [expr {$fsize/1024}]
201 set usedcnt [mem eval {SELECT sum(pri_pages+ovfl_pages) FROM space_used}]
202 set freecnt [expr {$file_pgcnt-$usedcnt-1}]
203 set freecnt2 [lindex [btree_get_meta $DB] 0]
204 statline {Pages in the whole file (measured)} $file_pgcnt
205 set file_pgcnt2 [expr {$usedcnt+$freecnt2+1}]
206 statline {Pages in the whole file (calculated)} $file_pgcnt2
207 statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt]
208 statline {Pages on the freelist (per header)}\
209 $freecnt2 [percent $freecnt2 $file_pgcnt]
210 statline {Pages on the freelist (calculated)}\
211 $freecnt [percent $freecnt $file_pgcnt]
212 statline {Header pages} 1 [percent 1 $file_pgcnt]
214 set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
215 statline {Number of tables in the database} $ntable
216 set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
217 set autoindex [db eval {SELECT count(*) FROM sqlite_master
218 WHERE type='index' AND name LIKE '(% autoindex %)'}]
219 set manindex [expr {$nindex-$autoindex}]
220 statline {Number of indices} $nindex
221 statline {Number of named indices} $manindex [percent $manindex $nindex]
222 statline {Automatically generated indices} $autoindex \
223 [percent $autoindex $nindex]
225 set bytes_data [mem eval "SELECT sum(payload-4*nentry) FROM space_used
226 WHERE NOT is_index AND name!='sqlite_master'"]
227 set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
228 statline "Size of the file in bytes" $fsize
229 statline "Bytes of payload stored" $total_payload \
230 [percent $total_payload $fsize]
231 statline "Bytes of user data stored" $bytes_data \
232 [percent $bytes_data $fsize]
234 # Output table rankings
236 puts ""
237 puts "*** Page counts for all tables with their indices ********************"
238 puts ""
239 mem eval {SELECT tblname, count(*) AS cnt, sum(pri_pages+ovfl_pages) AS size
240 FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} {
241 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
244 # Output subreports
246 if {$nindex>0} {
247 subreport {All tables and indices} 1
249 subreport {All tables} {NOT is_index}
250 if {$nindex>0} {
251 subreport {All indices} {is_index}
253 foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
254 ORDER BY name}] {
255 regsub ' $tbl '' qn
256 set name [string toupper $tbl]
257 set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
258 if {$n>1} {
259 subreport "Table $name and all its indices" "tblname='$qn'"
260 subreport "Table $name w/o any indices" "name='$qn'"
261 subreport "Indices of table $name" "tblname='$qn' AND is_index"
262 } else {
263 subreport "Table $name" "name='$qn'"
267 # Output instructions on what the numbers above mean.
269 puts {
270 *** Definitions ******************************************************
272 Number of pages in the whole file
274 The number of 1024-byte pages that go into forming the complete database
276 Pages that store data
278 The number of pages that store data, either as primary B*Tree pages or
279 as overflow pages. The number at the right is the data pages divided by
280 the total number of pages in the file.
282 Pages on the freelist
284 The number of pages that are not currently in use but are reserved for
285 future use. The percentage at the right is the number of freelist pages
286 divided by the total number of pages in the file.
288 Header pages
290 The number of pages of header overhead in the database. This value is
291 always 1. The percentage at the right is the number of header pages
292 divided by the total number of pages in the file.
294 Number of tables in the database
296 The number of tables in the database, including the SQLITE_MASTER table
297 used to store schema information.
299 Number of indices
301 The total number of indices in the database.
303 Number of named indices
305 The number of indices created using an explicit CREATE INDEX statement.
307 Automatically generated indices
309 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
310 on tables.
312 Size of the file in bytes
314 The total amount of disk space used by the entire database files.
316 Bytes of payload stored
318 The total number of bytes of payload stored in the database. Payload
319 includes both key and data. The content of the SQLITE_MASTER table is
320 counted when computing this number. The percentage at the right shows
321 the payload divided by the total file size.
323 Bytes of user data stored
325 The total number of bytes of data stored in the database, not counting
326 the database schema information stored in the SQLITE_MASTER table. The
327 percentage at the right is the user data size divided by the total file
328 size.
330 Percentage of total database
332 The amount of the complete database file that is devoted to storing
333 information described by this category.
335 Number of entries
337 The total number of B*Tree key/value pairs stored under this category.
339 Bytes of storage consumed
341 The total amount of disk space required to store all B*Tree entries
342 under this category. The is the total number of pages used times
343 the pages size (1024).
345 Bytes of payload
347 The amount of payload stored under this category. Payload is the sum
348 of keys and data. Each table entry has 4 bytes of key and an arbitrary
349 amount of data. Each index entry has 4 or more bytes of key and no
350 data. The percentage at the right is the bytes of payload divided by
351 the bytes of storage consumed.
353 Bytes of data
355 The amount of data stored under this category. The data space reported
356 includes formatting information such as nul-terminators and field-lengths
357 that are stored with the data. The percentage at the right is the bytes
358 of data divided by bytes of storage consumed.
360 Bytes of key
362 The sum of the sizes of all keys under this category. The percentage at
363 the right is the bytes of key divided by the bytes of storage consumed.
365 Average payload per entry
367 The average amount of payload on each entry. This is just the bytes of
368 payload divided by the number of entries.
370 Average unused bytes per entry
372 The average amount of free space remaining on all pages under this
373 category on a per-entry basis. This is the number of unused bytes on
374 all pages divided by the number of entries.
376 Maximum payload per entry
378 The largest payload size of any entry.
380 Entries that use overflow
382 Up to 236 bytes of payload for each entry are stored directly in the
383 primary B*Tree page. Any additional payload is stored on a linked list
384 of overflow pages. This is the number of entries that exceed 236 bytes
385 in size. The value to the right is the number of entries that overflow
386 divided by the total number of entries.
388 Total pages used
390 This is the number of 1024 byte pages used to hold all information in
391 the current category. This is the sum of primary and overflow pages.
393 Primary pages used
395 This is the number of primary B*Tree pages used.
397 Overflow pages used
399 The total number of overflow pages used for this category.
401 Unused bytes on primary pages
403 The total number of bytes of unused space on all primary pages. The
404 percentage at the right is the number of unused bytes divided by the
405 total number of bytes on primary pages.
407 Unused bytes on overflow pages
409 The total number of bytes of unused space on all overflow pages. The
410 percentage at the right is the number of unused bytes divided by the
411 total number of bytes on overflow pages.
413 Unused bytes on all pages
415 The total number of bytes of unused space on all primary and overflow
416 pages. The percentage at the right is the number of unused bytes
417 divided by the total number of bytes.
420 # Output the database
422 puts "**********************************************************************"
423 puts "The entire text of this report can be sourced into any SQL database"
424 puts "engine for further analysis. All of the text above is an SQL comment."
425 puts "The data used to generate this report follows:"
426 puts "*/"
427 puts "BEGIN;"
428 puts $tabledef
429 unset -nocomplain x
430 mem eval {SELECT * FROM space_used} x {
431 puts -nonewline "INSERT INTO space_used VALUES("
432 regsub ' $x(name) '' qn
433 regsub ' $x(tblname) '' qtn
434 puts -nonewline "'$qn','$qtn',"
435 puts -nonewline "$x(is_index),$x(nentry),$x(payload),$x(mx_payload),"
436 puts -nonewline "$x(n_ovfl),$x(pri_pages),$x(ovfl_pages),$x(pri_unused),"
437 puts "$x(ovfl_unused));"
439 puts "COMMIT;"