2 # -*- Mode: perl; indent-tabs-mode: nil -*-
4 # The contents of this file are subject to the Mozilla Public
5 # License Version 1.1 (the "License"); you may not use this file
6 # except in compliance with the License. You may obtain a copy of
7 # the License at http://www.mozilla.org/MPL/
9 # Software distributed under the License is distributed on an "AS
10 # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
11 # implied. See the License for the specific language governing
12 # rights and limitations under the License.
14 # The Original Code is the Bugzilla Bug Tracking System.
16 # The Initial Developer of the Original Code is Netscape Communications
17 # Corporation. Portions created by Netscape are
18 # Copyright (C) 1998 Netscape Communications Corporation. All
21 # Contributor(s): Terry Weissman <terry@mozilla.org>,
22 # Harrison Page <harrison@netscape.com>
23 # Gervase Markham <gerv@gerv.net>
24 # Richard Walters <rwalters@qualcomm.com>
25 # Jean-Sebastien Guay <jean_seb@hybride.com>
26 # Frédéric Buclin <LpSolit@gmail.com>
28 # Run me out of cron at midnight to collect Bugzilla statistics.
30 # To run new charts for a specific date, pass it in on the command line in
31 # ISO (2004-08-14) format.
41 use Bugzilla
::Constants
;
46 use Bugzilla
::Product
;
49 # Turn off output buffering (probably needed when displaying output feedback
50 # in the regenerate mode).
53 # Tidy up after graphing module
54 my $cwd = Cwd
::getcwd
();
55 if (chdir("graphs")) {
58 # chdir("..") doesn't work if graphs is a symlink, see bug 429378
62 # This is a pure command line script.
63 Bugzilla
->usage_mode(USAGE_MODE_CMDLINE
);
65 my $dbh = Bugzilla
->switch_to_shadow_db();
68 # To recreate the daily statistics, run "collectstats.pl --regenerate" .
70 if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") {
75 my $datadir = bz_locations
()->{'datadir'};
77 my @myproducts = map {$_->name} Bugzilla
::Product
->get_all;
78 unshift(@myproducts, "-All-");
80 # As we can now customize statuses and resolutions, looking at the current list
81 # of legal values only is not enough as some now removed statuses and resolutions
82 # may have existed in the past, or have been renamed. We want them all.
84 foreach my $field ('bug_status', 'resolution') {
85 my $values = get_legal_field_values
($field);
86 my $old_values = $dbh->selectcol_arrayref(
87 "SELECT bugs_activity.added
90 ON fielddefs.id = bugs_activity.fieldid
92 ON $field.value = bugs_activity.added
93 WHERE fielddefs.name = ?
98 SELECT bugs_activity.removed
101 ON fielddefs.id = bugs_activity.fieldid
103 ON $field.value = bugs_activity.removed
104 WHERE fielddefs.name = ?
105 AND $field.id IS NULL",
106 undef, ($field, $field));
108 push(@
$values, @
$old_values);
109 $fields->{$field} = $values;
112 my @statuses = @
{$fields->{'bug_status'}};
113 my @resolutions = @
{$fields->{'resolution'}};
114 # Exclude "" from the resolution list.
115 @resolutions = grep {$_} @resolutions;
118 foreach (@myproducts) {
119 my $dir = "$datadir/mining";
121 &check_data_dir
($dir);
124 ®enerate_stats
($dir, $_);
126 &collect_stats
($dir, $_);
130 # Uncomment the following line for performance testing.
131 #print "Total time taken " . delta_time($tstart, $tend) . "\n";
138 local $ENV{'GATEWAY_INTERFACE'} = 'cmdline';
139 local $ENV{'REQUEST_METHOD'} = 'GET';
140 local $ENV{'QUERY_STRING'} = 'ctype=rdf';
145 # Generate a static RDF file containing the default view of the duplicates data.
146 open(CGI
, "$perl -T duplicates.cgi |")
147 || die "can't fork duplicates.cgi: $!";
148 open(RDF
, ">$datadir/duplicates.tmp")
149 || die "can't write to $datadir/duplicates.tmp: $!";
150 my $headers_done = 0;
152 print RDF
if $headers_done;
153 $headers_done = 1 if $_ eq "\r\n";
158 if (-s
"$datadir/duplicates.tmp") {
159 rename("$datadir/duplicates.rdf", "$datadir/duplicates-old.rdf");
160 rename("$datadir/duplicates.tmp", "$datadir/duplicates.rdf");
175 my $when = localtime (time);
176 my $dbh = Bugzilla
->dbh;
179 if ($product ne '-All-') {
180 my $prod = Bugzilla
::Product
::check_product
($product);
181 $product_id = $prod->id;
184 # NB: Need to mangle the product for the filename, but use the real
185 # product name in the query
186 my $file_product = $product;
187 $file_product =~ s/\//-/gs
;
188 my $file = join '/', $dir, $file_product;
189 my $exists = -f
$file;
191 # if the file exists, get the old status and resolution list for that product.
193 @data = get_old_data
($file) if $exists;
195 # If @data is not empty, then we have to recreate the data file.
197 open(DATA
, '>', $file)
198 || ThrowCodeError
('chart_file_open_fail', {'filename' => $file});
201 open(DATA
, '>>', $file)
202 || ThrowCodeError
('chart_file_open_fail', {'filename' => $file});
205 # Now collect current data.
207 my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?};
208 my $reso_sql = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?};
210 if ($product ne '-All-') {
211 $status_sql .= q{ AND product_id = ?};
212 $reso_sql .= q{ AND product_id = ?};
215 my $sth_status = $dbh->prepare($status_sql);
216 my $sth_reso = $dbh->prepare($reso_sql);
219 foreach my $status (@statuses) {
221 push (@values, $product_id) if ($product ne '-All-');
222 my $count = $dbh->selectrow_array($sth_status, undef, @values);
225 foreach my $resolution (@resolutions) {
226 @values = ($resolution);
227 push (@values, $product_id) if ($product ne '-All-');
228 my $count = $dbh->selectrow_array($sth_reso, undef, @values);
232 if (!$exists || scalar(@data)) {
233 my $fields = join('|', ('DATE', @statuses, @resolutions));
235 # Bugzilla Daily Bug Stats
237 # Do not edit me! This file is generated.
245 # Add existing data, if needed. Note that no count is not treated
246 # the same way as a count with 0 bug.
247 foreach my $data (@data) {
248 print DATA
join('|', map {defined $data->{$_} ?
$data->{$_} : ''}
249 ('DATE', @statuses, @resolutions)) . "\n";
251 print DATA
(join '|', @row) . "\n";
259 open(DATA
, '<', $file)
260 || ThrowCodeError
('chart_file_open_fail', {'filename' => $file});
268 if (/^# fields?:\s*(.+)\s*$/) {
269 @columns = split(/\|/, $1);
270 # Compare this list with @statuses and @resolutions.
271 # If they are identical, then we can safely append new data
272 # to the end of the file; else we have to recreate it.
274 my @new_cols = ($columns[0], @statuses, @resolutions);
275 if (scalar(@columns) == scalar(@new_cols)) {
277 for (0 .. $#columns) {
278 $identical = 0 if ($columns[$_] ne $new_cols[$_]);
283 next unless $recreate;
284 next if (/^#/); # Ignore comments.
285 # If we have to recreate the file, we have to load all existing
287 my @line = split /\|/;
289 foreach my $column (@columns) {
290 $data{$column} = shift @line;
298 sub calculate_dupes
{
299 my $dbh = Bugzilla
->dbh;
300 my $rows = $dbh->selectall_arrayref("SELECT dupe_of, dupe FROM duplicates");
307 my $today = &today_dash
;
309 # Save % count here in a date-named file
310 # so we can read it back in to do changed counters
311 # First, delete it if it exists, so we don't add to the contents of an old file
312 my $datadir = bz_locations
()->{'datadir'};
314 if (my @files = <$datadir/duplicates/dupes
$today*>) {
315 map { trick_taint
($_) } @files;
319 dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
321 # Create a hash with key "a bug number", value "bug which that bug is a
322 # direct dupe of" - straight from the duplicates table.
323 foreach my $row (@
$rows) {
324 my ($dupe_of, $dupe) = @
$row;
325 $dupes{$dupe} = $dupe_of;
328 # Total up the number of bugs which are dupes of a given bug
329 # count will then have key = "bug number",
330 # value = "number of immediate dupes of that bug".
331 foreach $key (keys(%dupes))
333 my $dupe_of = $dupes{$key};
335 if (!defined($count{$dupe_of})) {
336 $count{$dupe_of} = 0;
342 # Now we collapse the dupe tree by iterating over %count until
343 # there is no further change.
344 while ($changed == 1)
347 foreach $key (keys(%count)) {
348 # if this bug is actually itself a dupe, and has a count...
349 if (defined($dupes{$key}) && $count{$key} > 0) {
350 # add that count onto the bug it is a dupe of,
351 # and zero the count; the check is to avoid
353 if ($count{$dupes{$key}} != 0) {
354 $count{$dupes{$key}} += $count{$key};
362 # Remove the values for which the count is zero
363 foreach $key (keys(%count))
365 if ($count{$key} == 0) {
373 # This regenerates all statistics from the database.
374 sub regenerate_stats
{
378 my $dbh = Bugzilla
->dbh;
379 my $when = localtime(time());
382 # NB: Need to mangle the product for the filename, but use the real
383 # product name in the query
384 my $file_product = $product;
385 $file_product =~ s/\//-/gs
;
386 my $file = join '/', $dir, $file_product;
390 my $and_product = "";
391 my $from_product = "";
394 if ($product ne '-All-') {
395 $and_product = q{ AND products.name = ?};
396 $from_product = q{ INNER JOIN products
397 ON bugs.product_id = products.id};
398 push (@values, $product);
401 # Determine the start date from the date the first bug in the
402 # database was created, and the end date from the current day.
403 # If there were no bugs in the search, return early.
404 my $query = q{SELECT } .
405 $dbh->sql_to_days('creation_ts') . q{ AS start_day, } .
406 $dbh->sql_to_days('current_date') . q{ AS end_day, } .
407 $dbh->sql_to_days("'1970-01-01'") .
408 qq{ FROM bugs
$from_product
409 WHERE
} . $dbh->sql_to_days('creation_ts') .
410 qq{ IS NOT NULL
$and_product
411 ORDER BY start_day
} . $dbh->sql_limit(1);
412 my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values);
414 if (!defined $start) {
418 if (open DATA
, ">$file") {
420 my $fields = join('|', ('DATE', @statuses, @resolutions));
422 # Bugzilla Daily Bug Stats
424 # Do not edit me! This file is generated.
430 # For each day, generate a line of statistics.
431 my $total_days = $end - $start;
432 for (my $day = $start + 1; $day <= $end; $day++) {
433 # Some output feedback
434 my $percent_done = ($day - $start - 1) * 100 / $total_days;
435 printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
437 # Get a list of bugs that were created the previous day, and
438 # add those bugs to the list of bugs for this product.
439 $query = qq{SELECT bug_id
440 FROM bugs
$from_product
441 WHERE bugs
.creation_ts
< } .
442 $dbh->sql_from_days($day - 1) .
443 q{ AND bugs.creation_ts >= } .
444 $dbh->sql_from_days($day - 2) .
445 $and_product . q{ ORDER BY bug_id};
447 my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
449 push(@bugs, @
$bug_ids);
451 # For each bug that existed on that day, determine its status
452 # at the beginning of the day. If there were no status
453 # changes on or after that day, the status was the same as it
454 # is today, which can be found in the bugs table. Otherwise,
455 # the status was equal to the first "previous value" entry in
456 # the bugs_activity table for that bug made on or after that
459 foreach (@statuses) { $bugcount{$_} = 0; }
460 foreach (@resolutions) { $bugcount{$_} = 0; }
461 # Get information on bug states and resolutions.
462 $query = qq{SELECT bugs_activity
.removed
465 ON bugs_activity
.fieldid
= fielddefs
.id
466 WHERE fielddefs
.name
= ?
467 AND bugs_activity
.bug_id
= ?
468 AND bugs_activity
.bug_when
>= } .
469 $dbh->sql_from_days($day) .
470 " ORDER BY bugs_activity.bug_when " .
473 my $sth_bug = $dbh->prepare($query);
474 my $sth_status = $dbh->prepare(q{SELECT bug_status
478 my $sth_reso = $dbh->prepare(q{SELECT resolution
482 for my $bug (@bugs) {
483 my $status = $dbh->selectrow_array($sth_bug, undef,
486 $status = $dbh->selectrow_array($sth_status, undef, $bug);
489 if (defined $bugcount{$status}) {
490 $bugcount{$status}++;
492 my $resolution = $dbh->selectrow_array($sth_bug, undef,
494 unless ($resolution) {
495 $resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
498 if (defined $bugcount{$resolution}) {
499 $bugcount{$resolution}++;
503 # Generate a line of output containing the date and counts
504 # of bugs in each state.
505 my $date = sqlday
($day, $base);
507 foreach (@statuses) { print DATA
"|$bugcount{$_}"; }
508 foreach (@resolutions) { print DATA
"|$bugcount{$_}"; }
512 # Finish up output feedback for this product.
514 print "\rRegenerating $product \[100.0\%] - " .
515 delta_time
($tstart, $tend) . "\n";
523 my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
524 return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
528 my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
529 return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
533 my ($day, $base) = @_;
534 $day = ($day - $base) * 86400;
535 my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
536 return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
542 my $delta = $tend - $tstart;
543 my $hours = int($delta/3600);
544 my $minutes = int($delta/60) - ($hours * 60);
545 my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
546 return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
549 sub CollectSeriesData
{
550 # We need some way of randomising the distribution of series, such that
551 # all of the series which are to be run every 7 days don't run on the same
552 # day. This is because this might put the server under severe load if a
553 # particular frequency, such as once a week, is very common. We achieve
554 # this by only running queries when:
555 # (days_since_epoch + series_id) % frequency = 0. So they'll run every
556 # <frequency> days, but the start date depends on the series_id.
557 my $days_since_epoch = int(time() / (60 * 60 * 24));
558 my $today = $ARGV[0] || today_dash
();
560 # We save a copy of the main $dbh and then switch to the shadow and get
561 # that one too. Remember, these may be the same.
562 my $dbh = Bugzilla
->switch_to_main_db();
563 my $shadow_dbh = Bugzilla
->switch_to_shadow_db();
565 my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
567 "WHERE frequency != 0 AND " .
568 "MOD(($days_since_epoch + series_id), frequency) = 0",
571 # We prepare the insertion into the data table, for efficiency.
572 my $sth = $dbh->prepare("INSERT INTO series_data " .
573 "(series_id, series_date, series_value) " .
574 "VALUES (?, " . $dbh->quote($today) . ", ?)");
576 # We delete from the table beforehand, to avoid SQL errors if people run
577 # collectstats.pl twice on the same day.
578 my $deletesth = $dbh->prepare("DELETE FROM series_data
579 WHERE series_id = ? AND series_date = " .
580 $dbh->quote($today));
582 foreach my $series_id (keys %$serieses) {
583 # We set up the user for Search.pm's permission checking - each series
584 # runs with the permissions of its creator.
585 my $user = new Bugzilla
::User
($serieses->{$series_id}->{'creator'});
586 my $cgi = new Bugzilla
::CGI
($serieses->{$series_id}->{'query'});
589 # Do not die if Search->new() detects invalid data, such as an obsolete
590 # login name or a renamed product or component, etc.
592 my $search = new Bugzilla
::Search
('params' => $cgi,
593 'fields' => ["bugs.bug_id"],
595 my $sql = $search->getSQL();
596 $data = $shadow_dbh->selectall_arrayref($sql);
600 # We need to count the returned rows. Without subselects, we can't
601 # do this directly in the SQL for all queries. So we do it by hand.
602 my $count = scalar(@
$data) || 0;
604 $deletesth->execute($series_id);
605 $sth->execute($series_id, $count);