BugsSite/collectstats.pl

   1 #!/usr/bin/env perl -w
   2 # -*- Mode: perl; indent-tabs-mode: nil -*-
   3 #
   4 # The contents of this file are subject to the Mozilla Public
   5 # License Version 1.1 (the "License"); you may not use this file
   6 # except in compliance with the License. You may obtain a copy of
   7 # the License at http://www.mozilla.org/MPL/
   8 #
   9 # Software distributed under the License is distributed on an "AS
  10 # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  11 # implied. See the License for the specific language governing
  12 # rights and limitations under the License.
  13 #
  14 # The Original Code is the Bugzilla Bug Tracking System.
  15 #
  16 # The Initial Developer of the Original Code is Netscape Communications
  17 # Corporation. Portions created by Netscape are
  18 # Copyright (C) 1998 Netscape Communications Corporation. All
  19 # Rights Reserved.
  20 #
  21 # Contributor(s): Terry Weissman <terry@mozilla.org>,
  22 #                 Harrison Page <harrison@netscape.com>
  23 #                 Gervase Markham <gerv@gerv.net>
  24 #                 Richard Walters <rwalters@qualcomm.com>
  25 #                 Jean-Sebastien Guay <jean_seb@hybride.com>
  26 #                 Frédéric Buclin <LpSolit@gmail.com>
  27
  28 # Run me out of cron at midnight to collect Bugzilla statistics.
  29 #
  30 # To run new charts for a specific date, pass it in on the command line in
  31 # ISO (2004-08-14) format.
  32
  33 use AnyDBM_File;
  34 use strict;
  35 use IO::Handle;
  36 use Cwd;
  37
  38 use lib qw(. lib);
  39
  40 use Bugzilla;
  41 use Bugzilla::Constants;
  42 use Bugzilla::Error;
  43 use Bugzilla::Util;
  44 use Bugzilla::Search;
  45 use Bugzilla::User;
  46 use Bugzilla::Product;
  47 use Bugzilla::Field;
  48
  49 # Turn off output buffering (probably needed when displaying output feedback
  50 # in the regenerate mode).
  51 $| = 1;
  52
  53 # Tidy up after graphing module
  54 my $cwd = Cwd::getcwd();
  55 if (chdir("graphs")) {
  56     unlink <./*.gif>;
  57     unlink <./*.png>;
  58     # chdir("..") doesn't work if graphs is a symlink, see bug 429378
  59     chdir($cwd);
  60 }
  61
  62 # This is a pure command line script.
  63 Bugzilla->usage_mode(USAGE_MODE_CMDLINE);
  64
  65 my $dbh = Bugzilla->switch_to_shadow_db();
  66
  67
  68 # To recreate the daily statistics,  run "collectstats.pl --regenerate" .
  69 my $regenerate = 0;
  70 if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") {
  71     shift(@ARGV);
  72     $regenerate = 1;
  73 }
  74
  75 my $datadir = bz_locations()->{'datadir'};
  76
  77 my @myproducts = map {$_->name} Bugzilla::Product->get_all;
  78 unshift(@myproducts, "-All-");
  79
  80 # As we can now customize statuses and resolutions, looking at the current list
  81 # of legal values only is not enough as some now removed statuses and resolutions
  82 # may have existed in the past, or have been renamed. We want them all.
  83 my $fields = {};
  84 foreach my $field ('bug_status', 'resolution') {
  85     my $values = get_legal_field_values($field);
  86     my $old_values = $dbh->selectcol_arrayref(
  87                              "SELECT bugs_activity.added
  88                                 FROM bugs_activity
  89                           INNER JOIN fielddefs
  90                                   ON fielddefs.id = bugs_activity.fieldid
  91                            LEFT JOIN $field
  92                                   ON $field.value = bugs_activity.added
  93                                WHERE fielddefs.name = ?
  94                                  AND $field.id IS NULL
  95
  96                                UNION
  97
  98                               SELECT bugs_activity.removed
  99                                 FROM bugs_activity
 100                           INNER JOIN fielddefs
 101                                   ON fielddefs.id = bugs_activity.fieldid
 102                            LEFT JOIN $field
 103                                   ON $field.value = bugs_activity.removed
 104                                WHERE fielddefs.name = ?
 105                                  AND $field.id IS NULL",
 106                                undef, ($field, $field));
 107
 108     push(@$values, @$old_values);
 109     $fields->{$field} = $values;
 110 }
 111
 112 my @statuses = @{$fields->{'bug_status'}};
 113 my @resolutions = @{$fields->{'resolution'}};
 114 # Exclude "" from the resolution list.
 115 @resolutions = grep {$_} @resolutions;
 116
 117 my $tstart = time;
 118 foreach (@myproducts) {
 119     my $dir = "$datadir/mining";
 120
 121     &check_data_dir ($dir);
 122
 123     if ($regenerate) {
 124         &regenerate_stats($dir, $_);
 125     } else {
 126         &collect_stats($dir, $_);
 127     }
 128 }
 129 my $tend = time;
 130 # Uncomment the following line for performance testing.
 131 #print "Total time taken " . delta_time($tstart, $tend) . "\n";
 132
 133 &calculate_dupes();
 134
 135 CollectSeriesData();
 136
 137 {
 138     local $ENV{'GATEWAY_INTERFACE'} = 'cmdline';
 139     local $ENV{'REQUEST_METHOD'} = 'GET';
 140     local $ENV{'QUERY_STRING'} = 'ctype=rdf';
 141
 142     my $perl = $^X;
 143     trick_taint($perl);
 144
 145     # Generate a static RDF file containing the default view of the duplicates data.
 146     open(CGI, "$perl -T duplicates.cgi |")
 147         || die "can't fork duplicates.cgi: $!";
 148     open(RDF, ">$datadir/duplicates.tmp")
 149         || die "can't write to $datadir/duplicates.tmp: $!";
 150     my $headers_done = 0;
 151     while (<CGI>) {
 152         print RDF if $headers_done;
 153         $headers_done = 1 if $_ eq "\r\n";
 154     }
 155     close CGI;
 156     close RDF;
 157 }
 158 if (-s "$datadir/duplicates.tmp") {
 159     rename("$datadir/duplicates.rdf", "$datadir/duplicates-old.rdf");
 160     rename("$datadir/duplicates.tmp", "$datadir/duplicates.rdf");
 161 }
 162
 163 sub check_data_dir {
 164     my $dir = shift;
 165
 166     if (! -d $dir) {
 167         mkdir $dir, 0755;
 168         chmod 0755, $dir;
 169     }
 170 }
 171
 172 sub collect_stats {
 173     my $dir = shift;
 174     my $product = shift;
 175     my $when = localtime (time);
 176     my $dbh = Bugzilla->dbh;
 177
 178     my $product_id;
 179     if ($product ne '-All-') {
 180         my $prod = Bugzilla::Product::check_product($product);
 181         $product_id = $prod->id;
 182     }
 183
 184     # NB: Need to mangle the product for the filename, but use the real
 185     # product name in the query
 186     my $file_product = $product;
 187     $file_product =~ s/\//-/gs;
 188     my $file = join '/', $dir, $file_product;
 189     my $exists = -f $file;
 190
 191     # if the file exists, get the old status and resolution list for that product.
 192     my @data;
 193     @data = get_old_data($file) if $exists;
 194
 195     # If @data is not empty, then we have to recreate the data file.
 196     if (scalar(@data)) {
 197         open(DATA, '>', $file)
 198           || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
 199     }
 200     else {
 201         open(DATA, '>>', $file)
 202           || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
 203     }
 204
 205     # Now collect current data.
 206     my @row = (today());
 207     my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?};
 208     my $reso_sql   = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?};
 209
 210     if ($product ne '-All-') {
 211         $status_sql .= q{ AND product_id = ?};
 212         $reso_sql   .= q{ AND product_id = ?};
 213     }
 214
 215     my $sth_status = $dbh->prepare($status_sql);
 216     my $sth_reso   = $dbh->prepare($reso_sql);
 217
 218     my @values ;
 219     foreach my $status (@statuses) {
 220         @values = ($status);
 221         push (@values, $product_id) if ($product ne '-All-');
 222         my $count = $dbh->selectrow_array($sth_status, undef, @values);
 223         push(@row, $count);
 224     }
 225     foreach my $resolution (@resolutions) {
 226         @values = ($resolution);
 227         push (@values, $product_id) if ($product ne '-All-');
 228         my $count = $dbh->selectrow_array($sth_reso, undef, @values);
 229         push(@row, $count);
 230     }
 231
 232     if (!$exists || scalar(@data)) {
 233         my $fields = join('|', ('DATE', @statuses, @resolutions));
 234         print DATA <<FIN;
 235 # Bugzilla Daily Bug Stats
 236 #
 237 # Do not edit me! This file is generated.
 238 #
 239 # fields: $fields
 240 # Product: $product
 241 # Created: $when
 242 FIN
 243     }
 244
 245     # Add existing data, if needed. Note that no count is not treated
 246     # the same way as a count with 0 bug.
 247     foreach my $data (@data) {
 248         print DATA join('|', map {defined $data->{$_} ? $data->{$_} : ''}
 249                                  ('DATE', @statuses, @resolutions)) . "\n";
 250     }
 251     print DATA (join '|', @row) . "\n";
 252     close DATA;
 253     chmod 0644, $file;
 254 }
 255
 256 sub get_old_data {
 257     my $file = shift;
 258
 259     open(DATA, '<', $file)
 260       || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
 261
 262     my @data;
 263     my @columns;
 264     my $recreate = 0;
 265     while (<DATA>) {
 266         chomp;
 267         next unless $_;
 268         if (/^# fields?:\s*(.+)\s*$/) {
 269             @columns = split(/\|/, $1);
 270             # Compare this list with @statuses and @resolutions.
 271             # If they are identical, then we can safely append new data
 272             # to the end of the file; else we have to recreate it.
 273             $recreate = 1;
 274             my @new_cols = ($columns[0], @statuses, @resolutions);
 275             if (scalar(@columns) == scalar(@new_cols)) {
 276                 my $identical = 1;
 277                 for (0 .. $#columns) {
 278                     $identical = 0 if ($columns[$_] ne $new_cols[$_]);
 279                 }
 280                 last if $identical;
 281             }
 282         }
 283         next unless $recreate;
 284         next if (/^#/); # Ignore comments.
 285         # If we have to recreate the file, we have to load all existing
 286         # data first.
 287         my @line = split /\|/;
 288         my %data;
 289         foreach my $column (@columns) {
 290             $data{$column} = shift @line;
 291         }
 292         push(@data, \%data);
 293     }
 294     close(DATA);
 295     return @data;
 296 }
 297
 298 sub calculate_dupes {
 299     my $dbh = Bugzilla->dbh;
 300     my $rows = $dbh->selectall_arrayref("SELECT dupe_of, dupe FROM duplicates");
 301
 302     my %dupes;
 303     my %count;
 304     my $key;
 305     my $changed = 1;
 306
 307     my $today = &today_dash;
 308
 309     # Save % count here in a date-named file
 310     # so we can read it back in to do changed counters
 311     # First, delete it if it exists, so we don't add to the contents of an old file
 312     my $datadir = bz_locations()->{'datadir'};
 313
 314     if (my @files = <$datadir/duplicates/dupes$today*>) {
 315         map { trick_taint($_) } @files;
 316         unlink @files;
 317     }
 318
 319     dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
 320
 321     # Create a hash with key "a bug number", value "bug which that bug is a
 322     # direct dupe of" - straight from the duplicates table.
 323     foreach my $row (@$rows) {
 324         my ($dupe_of, $dupe) = @$row;
 325         $dupes{$dupe} = $dupe_of;
 326     }
 327
 328     # Total up the number of bugs which are dupes of a given bug
 329     # count will then have key = "bug number",
 330     # value = "number of immediate dupes of that bug".
 331     foreach $key (keys(%dupes))
 332     {
 333         my $dupe_of = $dupes{$key};
 334
 335         if (!defined($count{$dupe_of})) {
 336             $count{$dupe_of} = 0;
 337         }
 338
 339         $count{$dupe_of}++;
 340     }
 341
 342     # Now we collapse the dupe tree by iterating over %count until
 343     # there is no further change.
 344     while ($changed == 1)
 345     {
 346         $changed = 0;
 347         foreach $key (keys(%count)) {
 348             # if this bug is actually itself a dupe, and has a count...
 349             if (defined($dupes{$key}) && $count{$key} > 0) {
 350                 # add that count onto the bug it is a dupe of,
 351                 # and zero the count; the check is to avoid
 352                 # loops
 353                 if ($count{$dupes{$key}} != 0) {
 354                     $count{$dupes{$key}} += $count{$key};
 355                     $count{$key} = 0;
 356                     $changed = 1;
 357                 }
 358             }
 359         }
 360     }
 361
 362     # Remove the values for which the count is zero
 363     foreach $key (keys(%count))
 364     {
 365         if ($count{$key} == 0) {
 366             delete $count{$key};
 367         }
 368     }
 369
 370     dbmclose(%count);
 371 }
 372
 373 # This regenerates all statistics from the database.
 374 sub regenerate_stats {
 375     my $dir = shift;
 376     my $product = shift;
 377
 378     my $dbh = Bugzilla->dbh;
 379     my $when = localtime(time());
 380     my $tstart = time();
 381
 382     # NB: Need to mangle the product for the filename, but use the real
 383     # product name in the query
 384     my $file_product = $product;
 385     $file_product =~ s/\//-/gs;
 386     my $file = join '/', $dir, $file_product;
 387
 388     my @bugs;
 389
 390     my $and_product = "";
 391     my $from_product = "";
 392
 393     my @values = ();
 394     if ($product ne '-All-') {
 395         $and_product = q{ AND products.name = ?};
 396         $from_product = q{ INNER JOIN products
 397                           ON bugs.product_id = products.id};
 398         push (@values, $product);
 399     }
 400
 401     # Determine the start date from the date the first bug in the
 402     # database was created, and the end date from the current day.
 403     # If there were no bugs in the search, return early.
 404     my $query = q{SELECT } .
 405                 $dbh->sql_to_days('creation_ts') . q{ AS start_day, } .
 406                 $dbh->sql_to_days('current_date') . q{ AS end_day, } .
 407                 $dbh->sql_to_days("'1970-01-01'") .
 408                  qq{ FROM bugs $from_product
 409                    WHERE } . $dbh->sql_to_days('creation_ts') .
 410                          qq{ IS NOT NULL $and_product
 411                 ORDER BY start_day } . $dbh->sql_limit(1);
 412     my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values);
 413
 414     if (!defined $start) {
 415         return;
 416     }
 417
 418     if (open DATA, ">$file") {
 419         DATA->autoflush(1);
 420         my $fields = join('|', ('DATE', @statuses, @resolutions));
 421         print DATA <<FIN;
 422 # Bugzilla Daily Bug Stats
 423 #
 424 # Do not edit me! This file is generated.
 425 #
 426 # fields: $fields
 427 # Product: $product
 428 # Created: $when
 429 FIN
 430         # For each day, generate a line of statistics.
 431         my $total_days = $end - $start;
 432         for (my $day = $start + 1; $day <= $end; $day++) {
 433             # Some output feedback
 434             my $percent_done = ($day - $start - 1) * 100 / $total_days;
 435             printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
 436
 437             # Get a list of bugs that were created the previous day, and
 438             # add those bugs to the list of bugs for this product.
 439             $query = qq{SELECT bug_id
 440                           FROM bugs $from_product
 441                          WHERE bugs.creation_ts < } .
 442                          $dbh->sql_from_days($day - 1) .
 443                          q{ AND bugs.creation_ts >= } .
 444                          $dbh->sql_from_days($day - 2) .
 445                         $and_product . q{ ORDER BY bug_id};
 446
 447             my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
 448
 449             push(@bugs, @$bug_ids);
 450
 451             # For each bug that existed on that day, determine its status
 452             # at the beginning of the day.  If there were no status
 453             # changes on or after that day, the status was the same as it
 454             # is today, which can be found in the bugs table.  Otherwise,
 455             # the status was equal to the first "previous value" entry in
 456             # the bugs_activity table for that bug made on or after that
 457             # day.
 458             my %bugcount;
 459             foreach (@statuses) { $bugcount{$_} = 0; }
 460             foreach (@resolutions) { $bugcount{$_} = 0; }
 461             # Get information on bug states and resolutions.
 462             $query = qq{SELECT bugs_activity.removed
 463                           FROM bugs_activity
 464                     INNER JOIN fielddefs
 465                             ON bugs_activity.fieldid = fielddefs.id
 466                          WHERE fielddefs.name = ?
 467                            AND bugs_activity.bug_id = ?
 468                            AND bugs_activity.bug_when >= } .
 469                            $dbh->sql_from_days($day) .
 470                     " ORDER BY bugs_activity.bug_when " .
 471                           $dbh->sql_limit(1);
 472
 473             my $sth_bug = $dbh->prepare($query);
 474             my $sth_status = $dbh->prepare(q{SELECT bug_status
 475                                                FROM bugs
 476                                               WHERE bug_id = ?});
 477
 478             my $sth_reso = $dbh->prepare(q{SELECT resolution
 479                                              FROM bugs
 480                                             WHERE bug_id = ?});
 481
 482             for my $bug (@bugs) {
 483                 my $status = $dbh->selectrow_array($sth_bug, undef,
 484                                                        'bug_status', $bug);
 485                 unless ($status) {
 486                     $status = $dbh->selectrow_array($sth_status, undef, $bug);
 487                 }
 488
 489                 if (defined $bugcount{$status}) {
 490                     $bugcount{$status}++;
 491                 }
 492                 my $resolution = $dbh->selectrow_array($sth_bug, undef,
 493                                                          'resolution', $bug);
 494                 unless ($resolution) {
 495                     $resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
 496                 }
 497
 498                 if (defined $bugcount{$resolution}) {
 499                     $bugcount{$resolution}++;
 500                 }
 501             }
 502
 503             # Generate a line of output containing the date and counts
 504             # of bugs in each state.
 505             my $date = sqlday($day, $base);
 506             print DATA "$date";
 507             foreach (@statuses) { print DATA "|$bugcount{$_}"; }
 508             foreach (@resolutions) { print DATA "|$bugcount{$_}"; }
 509             print DATA "\n";
 510         }
 511
 512         # Finish up output feedback for this product.
 513         my $tend = time;
 514         print "\rRegenerating $product \[100.0\%] - " .
 515             delta_time($tstart, $tend) . "\n";
 516
 517         close DATA;
 518         chmod 0640, $file;
 519     }
 520 }
 521
 522 sub today {
 523     my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
 524     return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
 525 }
 526
 527 sub today_dash {
 528     my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
 529     return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
 530 }
 531
 532 sub sqlday {
 533     my ($day, $base) = @_;
 534     $day = ($day - $base) * 86400;
 535     my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
 536     return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
 537 }
 538
 539 sub delta_time {
 540     my $tstart = shift;
 541     my $tend = shift;
 542     my $delta = $tend - $tstart;
 543     my $hours = int($delta/3600);
 544     my $minutes = int($delta/60) - ($hours * 60);
 545     my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
 546     return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
 547 }
 548
 549 sub CollectSeriesData {
 550     # We need some way of randomising the distribution of series, such that
 551     # all of the series which are to be run every 7 days don't run on the same
 552     # day. This is because this might put the server under severe load if a
 553     # particular frequency, such as once a week, is very common. We achieve
 554     # this by only running queries when:
 555     # (days_since_epoch + series_id) % frequency = 0. So they'll run every
 556     # <frequency> days, but the start date depends on the series_id.
 557     my $days_since_epoch = int(time() / (60 * 60 * 24));
 558     my $today = $ARGV[0] || today_dash();
 559
 560     # We save a copy of the main $dbh and then switch to the shadow and get
 561     # that one too. Remember, these may be the same.
 562     my $dbh = Bugzilla->switch_to_main_db();
 563     my $shadow_dbh = Bugzilla->switch_to_shadow_db();
 564
 565     my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
 566                       "FROM series " .
 567                       "WHERE frequency != 0 AND " .
 568                       "MOD(($days_since_epoch + series_id), frequency) = 0",
 569                       "series_id");
 570
 571     # We prepare the insertion into the data table, for efficiency.
 572     my $sth = $dbh->prepare("INSERT INTO series_data " .
 573                             "(series_id, series_date, series_value) " .
 574                             "VALUES (?, " . $dbh->quote($today) . ", ?)");
 575
 576     # We delete from the table beforehand, to avoid SQL errors if people run
 577     # collectstats.pl twice on the same day.
 578     my $deletesth = $dbh->prepare("DELETE FROM series_data
 579                                    WHERE series_id = ? AND series_date = " .
 580                                    $dbh->quote($today));
 581
 582     foreach my $series_id (keys %$serieses) {
 583         # We set up the user for Search.pm's permission checking - each series
 584         # runs with the permissions of its creator.
 585         my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'});
 586         my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'});
 587         my $data;
 588
 589         # Do not die if Search->new() detects invalid data, such as an obsolete
 590         # login name or a renamed product or component, etc.
 591         eval {
 592             my $search = new Bugzilla::Search('params' => $cgi,
 593                                               'fields' => ["bugs.bug_id"],
 594                                               'user'   => $user);
 595             my $sql = $search->getSQL();
 596             $data = $shadow_dbh->selectall_arrayref($sql);
 597         };
 598
 599         if (!$@) {
 600             # We need to count the returned rows. Without subselects, we can't
 601             # do this directly in the SQL for all queries. So we do it by hand.
 602             my $count = scalar(@$data) || 0;
 603
 604             $deletesth->execute($series_id);
 605             $sth->execute($series_id, $count);
 606         }
 607     }
 608 }
 609