Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / maintenance / modules.pl
blob035be1f368bf715c5b326e374ecb12c02668f78f
1 #!/usr/bin/perl
3 =head1 NAME
5 modules.pl - information about modules in BioPerl core
7 =head1 SYNOPSIS
9 B<modules.pl> [B<-V|--verbose>] [B<-c|--count>] | [B<-l|--list>] |
10 [B<-u|--untested>] | [B<-i|--info> class] | [B<-i|--inherit> |
11 [B<-d|--dir> path ] | [B<-v|--version> | [B<-?|-h|--help>]
13 =head1 DESCRIPTION
15 This script counts, lists and provides other information about bioperl
16 modules. It is mainly meant to be run by bioperl maintainers.
18 The default action is to count modules in the bioperl core
19 distribution. Based on the class name it tries to classify them into
20 categories. The following is a tentative glossary of terms used.
23 =over 4
25 =item Base
27 Synonyms: Generic class, parameterized class, generic module.
29 A class that you don't instantiate in your scripts, but that it's a
30 template for other classes.
32 Examples: Bio::Tools::Run::WrapperBase - a base object for wrappers
33 around executables. Bio::Tools::Analysis::SimpleAnalysisBase - an
34 abstract superclass for SimpleAnalysis implementations
36 This are counted with C</:Base/ | /Base$/>; They have "Base" in the
37 beginning or end of the name.
39 =item Interface
41 Synonyms: protocol, feature set.
43 Class that defines a set of features that are common to a group of
44 classes.
46 Example: Bio::Tree::NodeI - interface describing a Tree Node.
48 This are counted with C</[^A-Z]I$/>; They have "I" at the end of the
49 name.
51 =item Component
53 A class that implements a small subset of their superclass. They are in
54 a directory with an identical name of the superclass. There are plenty
55 of them. You need only a small number of methods to be overridden.
57 Example: Bio::SeqIO::fasta.
59 This is counted with C</.*:[a-z]/>; Classes are inside their base directory
60 and all in lowercase.
62 =item Instance
64 The rest of them. It is sometimes helpful to divide them into two
65 types:
67 =over 2
69 =item Algorithmic classes
71 Example: Bio::AlignIO - Handler for AlignIO formats
73 =item Storage classes
75 Example: Bio::SimpleAlign - Multiple alignments held as a set of
76 sequences
78 =back
80 =back
82 =cut
85 # The helper class to store class status;
87 package BioClass;
89 sub new {
90 my $class = shift;
91 my $name = shift;
92 die unless $name;
94 my $self = {};
95 $self->{'name'} = $name;
96 $self->{'tested'} = 0;
97 $self->{'type'} = '';
98 $self->{'path'} = '';
100 bless $self, $class;
104 sub name {
105 my $self = shift;
106 return $self->{'name'};
109 sub tested {
110 my $self = shift;
111 my $value = shift;
112 $self->{'tested'} = 1 if defined $value && $value;
113 return $self->{'tested'} || 0;
116 sub type {
117 my $self = shift;
118 my $value = shift;
119 $self->{'type'} = $value if defined $value;
120 return $self->{'type'};
123 sub path {
124 my $self = shift;
125 my $value = shift;
126 $self->{'path'} = $value if defined $value;
127 return $self->{'path'};
130 sub add_superclass {
131 my $self = shift;
132 my $superclass = shift;
133 return unless $superclass;
134 $self->{'superclasses'}->{$superclass} = 1 ;
137 sub each_superclass {
138 my $self = shift;
139 return keys %{$self->{'superclasses'}};
142 sub add_used_class {
143 my $self = shift;
144 my $used_class = shift;
145 return unless $used_class;
146 $self->{'used_classes'}->{$used_class} = 1 ;
149 sub each_used_class {
150 my $self = shift;
151 return keys %{$self->{'used_classes'}};
154 package main;
156 use File::Find;
157 use Getopt::Long;
158 use Data::Dumper;
159 use strict;
162 # declare subroutines
163 sub dir;
164 sub modules;
165 sub count;
166 sub list_all;
167 sub untested;
168 sub info;
169 sub inherit;
170 sub synopsis;
171 sub version;
173 # command line options
174 my ($dir, $count,$list, $verbose,$info,$untested, $inherit, $synopsis,
175 $version);
176 GetOptions(
177 'dir:s' => \$dir,
178 'count' => \$count,
179 'list' => \$list,
180 'test_BioClass' => \&_test_BioClass,
181 'V|verbose' => \$verbose,
182 'untested' => \$untested,
183 'info:s' => \$info,
184 'inherit' => \$inherit,
185 'synopsis' => \$synopsis,
186 'version' => \$version,
187 'h|help|?' => sub{ exec('perldoc',$0); exit(0) }
191 our %MODULES; # storage structure
193 # find modules
194 my $pwd = $ENV{PWD};
195 my $seachdir = "$pwd/../Bio"; #default
196 my %FIND_OPTIONS = ( wanted => \&modules );
198 $seachdir = "$pwd/$dir" if $dir;
199 find \%FIND_OPTIONS, $seachdir;
202 # call subroutines
203 if ($list) { list_all }
204 elsif ($untested) { untested }
205 elsif ($info) { info($info) }
206 elsif ($inherit) { inherit }
207 elsif ($synopsis) { synopsis }
208 elsif ($version) { version }
209 else { count }
212 ################# end main ####################
216 # subroutines;
219 sub _test_BioClass {
220 $a = new BioClass('Bio::Test');
221 print "Class name: ", $a->name(), "\n";
222 $a->add_superclass('Bio::Super');
223 $a->add_superclass('Bio::Super2');
224 $a->tested(1);
225 $a->type('instance');
226 print Dumper [$a->each_superclass] if $a->tested;
227 print Dumper $a;
228 exit;
231 sub modules {
232 return unless /\.pm$/ ;
233 #return unless -e $_;
234 #print "file: $_\n" if $verbose;
235 open my $F, '<', $_ or warn "Could not read file '$_': $!\n" && return;
236 my $class;
237 while (<$F>) {
238 if (/^package\s+([\w:]+)\s*;/) {
239 #print $1, "\n" if $verbose;
240 $_ = $1;
241 $class = new BioClass($_);
242 $MODULES{$_} = $class;
243 if (/.*:[a-z]/) {
244 $class->type('component');
245 } elsif (/:Base/ | /Base$/) {
246 $class->type('base');
247 } elsif (/[^A-Z]I$/) {
248 $class->type('interface');
249 } else {
250 $class->type('instance');
252 $class->path($File::Find::name);
254 if (/^\w*use/ && /(Bio[\w:]+)\W*;/ && not /base/) {
255 next unless $class;
256 #print "\t$1\n" if $verbose;
257 $class->add_used_class($1);
259 if ((/\@ISA/ || /use base/) && /Bio/) {
260 next unless $class;
261 my $line = $_;
262 while ( $line =~ /(Bio[\w:]+)/g) {
263 #print "\t$1\n" if $verbose;
264 $class->add_superclass($1);
267 if (/\@ISA/ && /Bio/) {
268 next unless $class;
269 my $line = $_;
270 while ( $line =~ /(Bio[\w:]+)/g) {
271 #print "\t$1\n" if $verbose;
272 $class->add_superclass($1);
276 close $F;
279 =head1 OPTIONS
281 Only one option is processed on each run of the script. The --verbose
282 is an exception, it modifies the amount of output.
284 =over 4
286 =item B<-V | --verbose>
288 B<INACTIVE>
290 Set this option if you want to see more verbose output. Often that
291 will mean seeing warnings normally going into STDERR.
293 =cut
295 =item B<-d | --dir> path
297 Overides the default directories to check by one directory 'path' and
298 all its subdirectories.
300 =item B<-c | --count>
302 The default action if no other option is given. Gives the count of
303 modules broken to B<instance> ("usable"), B<base> ( (abstract)?
304 superclass) , B<interface> (the "I" files) and B<component> (used from
305 instantiable parent) modules, in addition to total number of modules.
307 Note that abstract superclass in bioperl is not an enforced concept and
308 they are not clearly indicateded in the class name.
310 =cut
312 sub count {
313 printf "Instance : %3d\n",
314 scalar (grep $MODULES{$_}->type =~ /instance/ , keys %MODULES);
315 printf "Base : %3d\n",
316 scalar (grep $MODULES{$_}->type =~ /base/ , keys %MODULES);
317 printf "Interface: %3d\n",
318 scalar (grep $MODULES{$_}->type =~ /interface/ , keys %MODULES);
319 printf "Component: %3d\n",
320 scalar (grep $MODULES{$_}->type =~ /component/ , keys %MODULES);
321 print "--------------\n";
322 printf "Total : %3d\n", scalar (keys %MODULES);
326 =item B<-l | --list>
328 Prints all the module names in alphabetical order. The output is a tab
329 separated list of category (see above) and module name per line. The
330 output can be processed with standard UNIX command line tools.
332 =cut
334 sub list_all {
335 foreach ( sort keys %MODULES) {
336 print $MODULES{$_}->type. "\t$_\n";
340 =item B<-u | --untested>
342 Prints a list of instance modules which are I<not> explicitly used by
343 test files in the directory. Superclasess or any classes used by others
344 are not reported, either, since their methods are assumed to be tested
345 by subclass tests.
348 =cut
350 sub _used_and_super {
351 my $name = shift;
352 # print "-:$name\n" if /Locati/;
353 foreach ($MODULES{$name}->each_superclass) {
354 next unless defined $MODULES{$_};
355 # print "-^$_\n" if /Locati/;
356 # unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
357 if (not $MODULES{$_}->tested) {
358 $MODULES{$_}->tested(1);
359 _used_and_super($_);
362 foreach ($MODULES{$name}->each_used_class) {
363 next unless defined $MODULES{$_};
364 # print "--$_\n" if /Locati/;
365 # unless (defined $MODULES{$_} or $MODULES{$_}->tested) {
366 if (not $MODULES{$_}->tested) {
367 $MODULES{$_}->tested(1);
368 _used_and_super($_);
370 # $MODULES{$_}->tested(1) && _used_and_super($_)
371 # unless defined $MODULES{$_} or $MODULES{$_}->tested;
373 return 1;
376 sub untested {
377 foreach (`find ../t -name "*.t" -print | xargs grep -hs "[ur][se][eq]"`) {
378 s/.*use +//;
379 s/.*require +//;
380 next unless /^Bio/;
382 s/[\W;]+$//;
383 s/([\w:]+).*/$1/;
384 my $name = $_;
386 next unless $MODULES{$_};
387 $MODULES{$_}->tested(1)
388 unless defined $MODULES{$_} and $MODULES{$_}->tested;
390 next if $MODULES{$name}->name eq "Bio::SeqIO::abi"; # exception: requires bioperl ext package
391 next if $MODULES{$name}->name eq "Bio::SeqIO::ctf"; # exception: requires bioperl ext package
392 next if $MODULES{$name}->name eq "Bio::SeqIO::exp"; # exception: requires bioperl ext package
393 next if $MODULES{$name}->name eq "Bio::SeqIO::pln"; # exception: requires bioperl ext package
394 next if $MODULES{$name}->name eq "Bio::SeqIO::ztr"; # exception: requires bioperl ext package
395 # print $MODULES{$name}->name, "\n";
396 # print Dumper $MODULES{$name};
398 _used_and_super($name);
402 foreach ( sort keys %MODULES) {
404 # skip some name spaces
405 next if /^Bio::Search/; # Bio::Search and Bio::SearchIO are extensively tested
406 # but classes are used by attribute naming
408 print "$_\n" if
409 $MODULES{$_}->type eq 'instance' and ($MODULES{$_}->tested == 0) ;
414 =item B<-i | --info> class
416 Dumps information about a class given as an argument.
418 =cut
420 sub info {
421 my $class = shift;
422 die "" unless $class;
423 #print Dumper $MODULES{$class};
424 my $c = $MODULES{$class};
425 print $c->name, "\n";
426 printf " Type:\n\t%s\n", $c->type;
427 print " Superclasses:\n";
428 foreach (sort $c->each_superclass) {
429 print "\t$_\n";
431 print " Used classes:\n";
432 foreach (sort $c->each_used_class) {
433 print "\t$_\n";
438 =item B<-i | --inherit>
440 Finds interface modules which inherit from an instantiable class.
442 Could be extended to check other bad inheritance patterns.
444 =cut
446 sub inherit {
447 foreach ( sort keys %MODULES) {
448 my $c=$MODULES{$_};
449 next unless $c->type =~ /interface/;
450 foreach my $super ($c->each_superclass) {
451 next if $super =~ /I$/;
452 print "Check this inheritance: ", $c->name, " <-- $super\n";
457 =item B<-s | --synopsis>
459 Test SYNOPSIS section of bioperl modules for runnability
461 =cut
463 sub synopsis {
464 foreach ( sort keys %MODULES) {
465 my $c=$MODULES{$_};
467 next unless $c->type eq "instance";
468 next if $c->name eq 'Bio::Root::Version';
469 next if $c->name eq 'Bio::Tools::HMM';
471 my $synopsis = '';
472 open my $F, '<', $c->path or warn "Could not read file '".$c->name."': $!\n" && return;
474 my $flag = 0;
475 while (<$F>) {
476 last if $flag && /^=/;
477 $synopsis .= $_ if $flag;
478 $flag = 1 if /^=head1 +SYNOPSIS/;
481 # remove comments
482 $synopsis =~ s/[^\$]#[^\n]*//g;
483 # allow linking to an other Bio module, e.g.: See L<Bio::DB::GFF>.
484 $synopsis =~ s/[^\n]*L<Bio[^\n]*//g;
485 # protect single quotes
486 $synopsis =~ s/'/"/g;
488 my $res = `perl -ce '$synopsis' 2>&1 `;
489 next if $res =~ /syntax OK/;
490 print $c->path, "\n";
491 print $synopsis;
492 print $res;
493 print "-" x 70, "\n";
494 # print "SYNOPSIS not runnable\n";
495 close $F;
499 =item B<-v | --version>
501 Test the VERSION of the module against the global one set in
502 Bio::Root::Variation. Print out the different ones.
504 =cut
506 sub version {
508 use Bio::Root::Version;
509 my $version = $Bio::Root::Version::VERSION;
511 my %skip = ( # these are defined together with an other module
512 # and can not be use independently
513 'Bio::AnalysisI::JobI' => 1,
514 'Bio::PrimarySeq::Fasta' => 1,
515 'Bio::DB::Fasta::Stream' => 1,
516 'Bio::DB::GFF::ID_Iterator' => 1,
517 'Bio::DB::GFF::Adaptor::dbi::faux_dbh' =>1,
518 'Bio::LiveSeq::IO::SRS' =>1 # tries to call an external module
521 foreach ( sort keys %MODULES) {
522 my $n=$MODULES{$_}->name;
523 next if $skip{$n};
524 my $vv= "\$${n}::VERSION";
525 my $v = `perl -we 'use $n; print $vv;'`;
526 printf "%50s %-3s\n", $n, $v unless $version eq $v;
530 __END__
532 =item B<-? | -h | --help>
534 This help text.
536 =back
538 =head1 FEEDBACK
540 =head2 Mailing Lists
542 User feedback is an integral part of the evolution of this and other
543 Bioperl modules. Send your comments and suggestions preferably to
544 the Bioperl mailing list. Your participation is much appreciated.
546 bioperl-l@bioperl.org - General discussion
547 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
549 =head2 Reporting Bugs
551 Report bugs to the Bioperl bug tracking system to help us keep track
552 of the bugs and their resolution. Bug reports can be submitted via the
553 web:
555 https://github.com/bioperl/bioperl-live/issues
557 =head1 AUTHOR
559 Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
561 =head1 Contributors
563 Albert Vilella, avilella-AT-gmail-DOT-com
565 =cut