From d5d4c3fed8d868feec529041b0a16866e61b24eb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Carn=C3=AB=20Draug?= Date: Thu, 13 Sep 2018 18:01:30 +0100 Subject: [PATCH] Bring base classes from BioPerl-run back to BioPerl. This means that new Bio::Tools::Run distributions won't have to be dependent on the whole of BioPerl-Run. Imported from bioperl-run 0c35b4339cb18ba21baf0d3c30b6145174e4fcff --- Bio/Tools/Run/Analysis.pm | 790 +++++++++++++++++ Bio/Tools/Run/AnalysisFactory.pm | 359 ++++++++ Bio/Tools/Run/WrapperBase.pm | 511 +++++++++++ Bio/Tools/Run/WrapperBase/CommandExts.pm | 1404 ++++++++++++++++++++++++++++++ Changes | 8 + 5 files changed, 3072 insertions(+) create mode 100644 Bio/Tools/Run/Analysis.pm create mode 100644 Bio/Tools/Run/AnalysisFactory.pm create mode 100644 Bio/Tools/Run/WrapperBase.pm create mode 100644 Bio/Tools/Run/WrapperBase/CommandExts.pm diff --git a/Bio/Tools/Run/Analysis.pm b/Bio/Tools/Run/Analysis.pm new file mode 100644 index 000000000..e9851f3c5 --- /dev/null +++ b/Bio/Tools/Run/Analysis.pm @@ -0,0 +1,790 @@ +# +# BioPerl module for Bio::Tools::Run::Analysis +# +# Please direct questions and support issues to +# +# Cared for by Martin Senger +# For copyright and disclaimer see below. +# + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::Analysis - Module representing any (remote or local) +analysis tool + +=head1 SYNOPSIS + + # run analysis 'seqret' using a default location and a default + # access method (which means using a Web Service at EBI) + use Bio::Tools::Run::Analysis; + print new Bio::Tools::Run::Analysis (-name => 'edit::seqret') + ->wait_for ({ sequence_direct_data => 'tatatacgtatacga', + osformat => 'embl' + }) + ->result ('outseq'); + + # run a longer job without waiting for its completion + use Bio::Tools::Run::Analysis; + my $job = Bio::Tools::Run::Analysis->new(-name => 'edit::seqret') + ->run ({ sequence_direct_data => 'tatatacgtatacga', + osformat => 'embl' + }); + # ...and after a while + $job->result ('outseq'); + + # get all results in the same invocation (as a hash reference + # with result names as keys) - let the module decide which + # results are binary (images in this examples) and save those + # in file (or files); it also shows how to tell that the module + # should read input data from a local file first + use Bio::Tools::Run::Analysis; + my $results = + Bio::Tools::Run::Analysis->new(-name => 'alignment_multiple::prettyplot') + ->wait_for ( { msf_direct_data => '@/home/testdata/my.seq' } ) + ->results ('?'); + use Data::Dumper; + print Dumper ($results); + + # get names, types of all inputs and results, + # get short and detailed (in XML) service description + use Bio::Tools::Run::Analysis; + my $service = Bio::Tools::Run::Analysis->new(-name => 'edit::seqret'); + my $hash1 = $service->input_spec; + my $hash2 = $service->result_spec; + my $hash3 = $service->analysis_spec; + my $xml = $service->describe; + + # get current job status + use Bio::Tools::Run::Analysis; + print new Bio::Tools::Run::Analysis (-name => 'edit::seqret') + ->run ( { #...input data... + } ) + ->status; + + # run a job and print its job ID, keep the job un-destroyed + use Bio::Tools::Run::Analysis; + my $job = + Bio::Tools::Run::Analysis->new(-name => 'edit::seqret', + -destroy_on_exit => 0) + ->run ( { sequence_direct_data => '@/home/testdata/mzef.seq' } ); + print $job->id . "\n"; + # ...it prints (for example): + # edit::seqret/c8ef56:ef535489ac:-7ff4 + + # ...in another time, on another planet, you may say + use Bio::Tools::Run::Analysis; + my $job = + Bio::Tools::Run::Analysis::Job->new(-name => 'edit::seqret', + -id => 'edit::seqret/c8ef56:ef535489ac:-7ff4'); + print join ("\n", + $job->status, + 'Finished: ' . $job->ended (1), # (1) means 'formatted' + 'Elapsed time: ' . $job->elapsed, + $job->last_event, + $job->result ('outseq') + ); + + # ...or you may achieve the same keeping module + # Bio::Tools::Run::Analysis::Job invisible + use Bio::Tools::Run::Analysis; + my $job = + Bio::Tools::Run::Analysis->new(-name => 'edit::seqret') + ->create_job ('edit::seqret/c8ef56:ef535489ac:-7ff4'); + print join ("\n", + $job->status, + # ... + ); + + # ...and later you may free this job resources + $job->remove; + + # + # --- See DESCRIPTION for using generator 'applmaker.pl': + # + + +=head1 DESCRIPTION + +The module represents an access to the local and/or remote analysis +tools in a unified way that allows adding new access methods +(protocols) seamlessly. + +At the moment of writing, there is available a I access to +almost all EMBOSS applications, running at the +European Bioinformatics Institute. + +The documentation of all C methods are to be found +in C. A tutorial (and examples how to call almost all +public methods) is in the script C (go to the C +directory and type C). + +The module C uses general approach allowing to set +arbitrary input data and to retrieve results by naming them. However, +sometimes is more convenient to use a specific module, representing +one analysis tool, that already knows about available input and result +names. Such analyses-specific Perl modules can be generated by +C generator. Its features and usage are documented in +the generator (go to the C directory and type C). + + # this will generate module Seqret.pm + perl papplmaker.PLS -n edit.seqret -m Seqret + + # ...which can be used with data-specific methods + use Seqret; + my $outseq = new Seqret + ->sequence_direct_data ('@/home/testdata/my.seq') + ->osformat ('embl') + ->wait_for + ->outseq + ; + print $outseq; + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +I + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via the +web: + + http://redmine.open-bio.org/projects/bioperl/ + +=head1 AUTHOR + +Martin Senger (martin.senger@gmail.com) + +=head1 COPYRIGHT + +Copyright (c) 2003, Martin Senger and EMBL-EBI. +All Rights Reserved. + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 DISCLAIMER + +This software is provided "as is" without warranty of any kind. + +=head1 SEE ALSO + +=over + +=item * + +http://www.ebi.ac.uk/soaplab/Perl_Client.html + +=back + +=head1 APPENDIX + +Here is the rest of the object methods. Internal methods are preceded +with an underscore _. + +=cut + + +# Let the code begin... + +package Bio::Tools::Run::Analysis; +use vars qw(@ISA $Revision); +use strict; + +use Bio::Root::Root; +use Bio::AnalysisI; +@ISA = qw(Bio::Root::Root Bio::AnalysisI); + +BEGIN { + $Revision = q[$Id$]; +} + +# ----------------------------------------------------------------------------- + +=head2 new + + Usage : my $tool = + Bio::Tools::Run::Analysis->new(-access => 'soap', + -name => 'edit.seqret', + ... + ); + Returns : a new Bio::Tools::Run::Analysis object representing the given tool + Args : There may be additional arguments which are specific + to the access method (see methods 'new' or '_initialize' + of the access-specific implementations (such as module + Bio::Tools::Run::Analysis::soap for a SOAP-based access). + + The recognised and used arguments are: + -access + -location + -name + -httpproxy + -timeout + +It builds, populates and returns a new C object. This +is how it is seen from the outside. But in fact, it builds, populates +and returns a more specific lower-level object, for example +C object - which one it depends on the C<-access> +parameter. + +=over + +=item -access + +It indicates what lower-level module to load. Default is 'soap'. +Other (but future) possibilities may be: + + -access => 'novella' + -access => 'local' + +=item -location + +A location of the service. The contents is access-specific (see +details in the lower-level implementation modules). + +Default is C ( services running +at European Bioinformatics Institute on top of most of EMBOSS +analyses, and on few others). + +=item -name + +A name of an analysis tool, or a name of its higher-level abstraction, +possibly including a category where the analysis belong to. There is +no default value (which usually means that this parameter is mandatory +unless your I<-location> parameter includes also the name (but it is +then access-dependent). + +=item -destroy_on_exit =E '0' + +Default value is '1' which means that all Bio::Tools::Run::Analysis::Job +objects - when being finalised - will send a request +to the remote site to forget the results of these jobs. + +If you change it to '0' make sure that you know the job identification +- otherwise you will not be able to re-established connection with it +(later, when you use your program again). This can be done by calling +method C on the job object (such object is returned by any of +these methods: C, C, C). + +=item -httpproxy + +In addition to the I parameter, you may need to specify also +a location/URL of an HTTP proxy server (if your site requires +one). The expected format is C. There is no +default value. It is also an access-specific parameter which may not +be used by all access methods. + +=item -timeout + +For long(er) running jobs the HTTP connection may be time-outed. In +order to avoid it (or, vice-versa, to call timeout sooner) you may +specify C with the number of seconds the connection will be +kept alive. Zero means to keep it alive forever. The default value is +two minutes. + +=back + +=cut + +sub new { + my ($caller,@args) = @_; + my $class = ref($caller) || $caller; + + if ($class eq 'Bio::Tools::Run::Analysis') { + + # this is called only the first time when somebody calls: 'new + # Bio::Tools::Run::Analysis (...)', and it actually loads a 'real-work-doing' + # module and call this new() method again (unless the loaded + # module has its own new() method) + + my %param = @args; + @param { map { lc $_ } keys %param } = values %param; # lowercase keys + my $access = + $param {'-access'} || # use -access parameter + &Bio::Tools::Run::Analysis::Utils::_guess_access ( \%param ) || # or guess from other parameters + 'soap'; # or use a default access method + $access = "\L$access"; # normalize capitalization to lower case + + # remember the access method (putting it into @args means that the + # object - when created - will remember it) + push (@args, (-access => $access)) unless $param {'-access'}; + + # load module with the real implementation - as defined in $access + return undef unless (&Bio::Tools::Run::Analysis::Utils::_load_access_module ($access)); + + # this calls this same method new() - but now its object part + # (see the upper branche above) is called + return "Bio::Tools::Run::Analysis::$access"->new (@args); + + } else { + + # if $caller is an object, or if it is an underlying + # 'real-work-doing' class (e.g. Bio::Tools::Run::Analysis::soap) then + # we want to call SUPER to create and bless a new object + + my ($self) = $class->SUPER::new (@args); + + # now the $self is an empty object - we will populate it from + # the $caller - if $caller is an object (so we do cloning here) + + if (ref ($caller)) { + %{ $self } = %{ $caller }; + } + + # and finally add values from '@args' into the newly created + # object (the values will overwrite the values copied above); + # this is done by calling '_initialize' of the 'real-work-doing' + # class (if there is no one there, there is always an empty one + # in Bio::Root::Root) + + $self->_initialize (@args); + return $self; + } + +} + +# +# Create a hash with named inputs, all extracted +# from the given data. +# +sub _prepare_inputs { + my $self = shift; + my %inputs = (); # collect here input data + + foreach my $input (@_) { + + next unless defined $input; + + # an element can be an array reference + # (with scalar elements: 'name = [[@]value]') + if (ref $input eq 'ARRAY') { + foreach my $elem (@$input) { + unless (ref $elem) { # taking only scalars + my ($name, $value) = split (/\s*=\s*/, $elem, 2); + next unless $name; # am I paranoid ? + $value = 1 unless defined $value; + $inputs{$name} = $value; + next; + } + } + } + + # ...or an element can be a hash + # (name => [@]value) + elsif (ref $input eq 'HASH') { + foreach my $name (keys %$input) { + my $value = $$input{$name}; + $inputs{$name} = $value; + } + } + + # ...or an element can be a scalar (which means that it + # represents a name of a boolean parameter (an option) + elsif (ref \$input eq 'SCALAR') { + $input =~ s/^@/\\@/; # this cannot be a filename + $inputs{$input} = 1; + } + + # everything else is ignored + else { + warn "Unrecognized input data type: $input\n"; + } + } + + # extracted inputs may be actually filenames and we want the + # contents of the files instead + # TBD: to support also filehandlers here? + foreach my $name (keys %inputs) { + $inputs{$name} = $self->_read_value ($inputs{$name}); + } + return \%inputs; +} + +# --- if a $value is a filename, read it and return its contents +# otherwise return the $value itself; if $value start with +# an escaped '@', change it to a normal '@' +sub _read_value { + my ($self, $value) = @_; + return unless defined $value; + if ($value =~ s/^\@//) { + my ($buf); + open (DATA, $value) || $self->throw ("Cannot read from '$value' ($!)"); + binmode (DATA); + undef $value; + while (read (DATA, $buf, 8 * 2**10)) { + $value .= $buf; + } + close DATA; + } elsif ($value =~ s/^\\\@/@/) { + } + $value; +} + +# --- save $value of result $name into file $filename + $seq; +# use some default filename if $filename not given + +#$part = $self->_save_result (-value => $part, +# -name => $name, +# -filename => $filename, +# -template => $template, +# -seq => $seq++); + +sub _save_result { + my ($self, %params) = @_; + my $name = $params{'-name'} || 'result'; + + # invent filename (if not given) from the given or default template + my $filename = $params{'-filename'}; + unless ($filename) { + $filename = $params{'-template'}; + $filename = "\$ANALYSIS_*_$name" unless $filename; + + # replace $ANALYSIS and $RESULT in the filename + if ($filename =~ /\$\{?ANALYSIS\}?/) { + # (better to ask if we need it because getting + # the analysis name may require going to server) + my $analysis = $self->analysis_name; + $analysis =~ s/[:\/]/_/g; # would be troubles in filename + $filename =~ s/\$\{?ANALYSIS\}?/$analysis/ig; + } + $filename =~ s/\$\{?RESULT\}?/$name/ig; + } + + # include the sequential number before file extension (if any) + my $seq = $params{'-seq'}; + if ($seq) { + my $pos = rindex ($filename, '.'); + if ($pos > -1) { + substr ($filename, $pos, 0) = ".$seq"; # insert $seq + } else { + $filename .= ".$seq"; # add $seq + } + } + + # replace '*' in filename with a unique number + while ($filename =~ /\*/) { + my $unique_name; + my $number = 1; + while (1) { + ($unique_name = $filename) =~ s/\*/$number/; + last unless -e $unique_name; + $number++; + } + $filename = $unique_name; + } + + # and finally write the file + open (DATA, ">$filename") || + $self->throw ("Error by saving result '$name' into '$filename' ($!)"); + binmode (DATA); + print (DATA $params{'-value'}) || + $self->throw ("Error by writing result '$name' into '$filename' ($!)"); + close DATA || + $self->throw ("Error by closing result '$name' in '$filename' ($!)"); + + return $filename; +} + + +=head2 VERSION and Revision + + Usage : print $Bio::Tools::Run::Analysis::VERSION; + print $Bio::Tools::Run::Analysis::Revision; + +=cut + +# ----------------------------------------------------------------------------- +# Bio::Tools::Run::Analysis::Job +# A module representing an invocation (execution, job) of an analysis. +# ----------------------------------------------------------------------------- + +package Bio::Tools::Run::Analysis::Job; + +=head1 Module Bio::Tools::Run::Analysis::Job + +It represents a job, a single execution of an analysis tool. Usually +you do not instantiate these objects - they are returned by methods +C, C, and C of C object. + +However, if you wish to re-create a job you need to know its ID +(method C gives it to you). The ID can be passed directly to the +C method, or again you may use C of a +C object with the ID as parameter. See SYNOPSIS above +for an example. + +Remember that all public methods of this module are described in +details in interface module C and in the tutorial in +the C script. + +=cut + + +use vars qw(@ISA); +use strict; + +use Bio::Root::Root; +@ISA = qw(Bio::Root::Root Bio::AnalysisI::JobI); + +# ----------------------------------------------------------------------------- + +=head2 new + + Usage : my $job = Bio::Tools::Run::Analysis::Job->new + (-access => 'soap', + -name => 'edit.seqret', + -id => 'xxxyyy111222333' + ); + Returns : a re-created object representing a job + Args : The same arguments as for Bio::Tools::Run::Analysis object: + -access + -location + -name + -httpproxy + -timeout + (and perhaps others) + Additionally and specifically for this object: + -id + -analysis + +=over + +=item -id + +A job ID created some previous time and now used to re-create the same +job (in order to re-gain access to this job results, for example). + +=item -analysis + +A C object whose properties (such as C<-access> and +C<-location> are used to re-create this job object. + +=back + +=cut + +sub new { + my ($caller, @args) = @_; + my $class = ref($caller) || $caller; + + if ($class eq 'Bio::Tools::Run::Analysis::Job') { + + # this is called only the first time when somebody calls: + #'Bio::Tools::Run::Analysis::Job->new(...)' + + my %param = @args; + @param { map { lc $_ } keys %param } = values %param; # lowercase keys + if ($param {'-analysis'}) { + + # usually a new Job object is created from an existing + # Analysis object - which means that the Analysis already + # loaded a 'real-work-doing' Job object, so we need just + # to create a Job object (by calling its new() method, + # which calls actually this new() method again - but its + # 'object' part - see below + + my $analysis = $param {'-analysis'}; + return undef unless $analysis->{'_access'}; # TBD: error message here? + my $access = $analysis->{'_access'}; + return "Bio::Tools::Run::Analysis::Job::$access"->new (@args); + + } else { + + # if a new Job object is created directly (by a user, not + # by a parent Analysis object) we need to create the + # Analysis object first (because it is the Analysis object + # who knows how to contact the underlying analysis tool), + # and only then let the Analysis create this Job object + # (which may be an empty Job - if there is no 'id' in @args) + + return new Bio::Tools::Run::Analysis (@args)->create_job ($param {'-id'}); + } + + } else { + + # if $caller is an object, or if it is an underlying + # 'real-work-doing' class (e.g. Bio::Tools::Run::Analysis::Job::soap) then + # we want to call SUPER to create and bless a new object + + my ($self) = $class->SUPER::new (@args); + + # now the $self is an empty object - we will populate it from + # the $caller - if $caller is an object (so we do cloning here) + + if (ref ($caller)) { + %{ $self } = %{ $caller }; + } + + # and finally add values from '@args' into the newly created + # object (the values will overwrite the values copied above); + # this is done by calling '_initialize' of the 'real-work-doing' + # class (if there is no one there, there is always an empty one + # in Bio::Root::Root) + + $self->_initialize (@args); + return $self; + } + +} + +sub id { shift->{'_id'}; } + +# --------------------------------------------------------------------- +# +# A Utility module... +# +# --------------------------------------------------------------------- + +package Bio::Tools::Run::Analysis::Utils; + +=head1 Module Bio::Tools::Run::Analysis::Utils + +It contains several general utilities. These are C, not +methods. Therefore call them like, for example: + + &Bio::Tools::Run::Analysis::Utils::format_time (...); + +=cut + +# ----------------------------------------------------------------------------- + +=head2 format_time + + Usage : Bio::Tools::Run::Analysis::Utils::format_time ($time); + Returns : Slightly formatted $time + Args : $time is number of seconds from the beginning of Epoch + +It returns what C returns which means that return value is +different in the array and scalar context (see localtime). If C<$time> +is ``-1'' it returns 'n/a' (in the scalar context) or an empty array +(in the array context). If C<$time> is too small to represent the +distance from the beginning of the Epoch, it returns it unchanged (the +same in any contex) - this is reasonable for C<$time> representing an +elapsed time. + +The function is used to format times coming back from various job time +methods. + +=cut + +sub format_time { + my $time = shift; + return wantarray ? () : 'n/a' if "$time" eq '-1'; + return $time if $time < 1000000000; + return localtime $time; +} + +# ----------------------------------------------------------------------------- + +# It processes given result names which may be of various different +# types and returns a hash reference with result names as keys and +# values being result destinations (such as file names, or templates +# how to create filenames. +# +# Or, it returns a scalar ('@[template]' or '?[template]') if there +# were no real result names but only a global rule how to create +# result destinantions for all results. +# +# Or, it returns 'undef' if there were no result names at all. + +sub normalize_names { + return undef unless @_; + my %names = (); + foreach (@_) { + if (ref $_ eq 'HASH') { + %names = (%names, %$_); + } elsif (not ref $_) { + my ($name, $dest) = split (/\s*=\s*/, $_, 2); + return $name if $name =~ /^@/; # special: it nullifies other rules + return $name if $name =~ /^\?/; # ditto + $names{$name} = $dest; # $dest may be undef + } + } + \%names; +} + +# ----------------------------------------------------------------------------- + +=head2 _load_access_module + + Usage : $class->_load_access_module ($access) + Returns : 1 on success, undef on failure + Args : 'access' should contain the last part of the + name of a module who does the real implementation + +It does (in the run-time) a similar thing as + + require Bio::Tools::Run::Analysis::$access + +It prints an error on STDERR if it fails to find and load the module +(for example, because of the compilation errors in the module). + +=cut + +sub _load_access_module { + my ($access) = @_; + + my $load = "Bio/Tools/Run/Analysis/$access.pm"; + eval { + require $load; + }; + + if ( $@ ) { + Bio::Root::Root->throw (<. Rememeber that this method is called only +if there was no I<-access> parameter which could tell directly what +access method to use. + +=cut + +sub _guess_access { + my ($rh_params) = @_; + return undef; +} + + + +1; +__END__ diff --git a/Bio/Tools/Run/AnalysisFactory.pm b/Bio/Tools/Run/AnalysisFactory.pm new file mode 100644 index 000000000..c0407ce76 --- /dev/null +++ b/Bio/Tools/Run/AnalysisFactory.pm @@ -0,0 +1,359 @@ +# +# BioPerl module for Bio::Tools::Run::AnalysisFactory +# +# Please direct questions and support issues to +# +# Cared for by Martin Senger +# For copyright and disclaimer see below. +# + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::AnalysisFactory - A directory of analysis tools + +=head1 SYNOPSIS + + # list all available analyses from the default location, + # using a default (SOAP) access method + use Bio::Tools::Run::AnalysisFactory; + my $list = Bio::Tools::Run::AnalysisFactory->new(); + ->available_analyses; + use Data::Dumper; print Dumper ($list); + + # ditto, but from a different location + use Bio::Tools::Run::AnalysisFactory; + my $list = + Bio::Tools::Run::AnalysisFactory->new(-location => 'http://somewhere/something') + ->available_analyses; + + # ...and using a different access method + # (this example is not yet impelmented) + use Bio::Tools::Run::AnalysisFactory; + my $list = + Bio::Tools::Run::AnalysisFactory->new(-location => 'http://somewhere/something', + -access => 'novella') + ->available_analyses; + + # list available categories of analyses + use Bio::Tools::Run::AnalysisFactory; + my $categories = + Bio::Tools::Run::AnalysisFactory->new(); + ->available_categories; + use Data::Dumper; print Dumper ($categories); + + # show all analyses group by categories + use Bio::Tools::Run::AnalysisFactory; + my $factory = Bio::Tools::Run::AnalysisFactory->new(); + foreach $cat ( @{ $factory->available_categories } ) { + my @sublist = @{ $factory->available_analyses ($cat) }; + print "$cat:\n\t", + join ("\n\t", @{ $factory->available_analyses ($cat) }), + "\n"; + } + + # create an analysis object + use Bio::Tools::Run::AnalysisFactory; + $service = Bio::Tools::Run::AnalysisFactory->new(); + ->create_analysis ('edit.seqret'); + $service->run ( + #... + )->results; + +=head1 DESCRIPTION + +The module represents a list of available analysis tools from a given +location using a given access method. Additionally, for any of the +available analyses, it can create an object of type C. + +The module is a higher-level abstraction whose main job is to load a +'real-work-doing' implementation. Which one is used, it depends on the +C<-access> parameter. The same design is used here as for +C module. + +There is available a I access to almost all EMBOSS applications, +running at European Bioinformatics Institute. + +The documentation of all C methods are to be found +in C. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +I + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via the +web: + + http://redmine.open-bio.org/projects/bioperl/ + +=head1 AUTHOR + +Martin Senger (martin.senger@gmail.com) + +=head1 COPYRIGHT + +Copyright (c) 2003, Martin Senger and EMBL-EBI. +All Rights Reserved. + +This module is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +=head1 DISCLAIMER + +This software is provided "as is" without warranty of any kind. + +=head1 SEE ALSO + +=over 4 + +=item * + +http://www.ebi.ac.uk/soaplab/Perl_Client.html + +=back + +=head1 APPENDIX + +Here is the rest of the object methods. Internal methods are preceded +with an underscore _. + +=cut + + +# Let the code begin... + +package Bio::Tools::Run::AnalysisFactory; +use vars qw(@ISA $Revision); +use strict; + +use Bio::Root::Root; +use Bio::Factory::AnalysisI; +@ISA = qw(Bio::Root::Root Bio::Factory::AnalysisI); + + +BEGIN { + $Revision = q$Id$; +} + +# ----------------------------------------------------------------------------- + +# Available (understood) parameters: +# -access +# (+ parameters used in guessing an access) + +# ----------------------------------------------------------------------------- + +=head2 new + + Usage : my $factory = + Bio::Tools::Run::AnalysisFactory->new(-access => 'soap', + -location => 'http://...'); + Returns : a new Bio::Tools::Run::AnalysisFactory object representing a list + of available analyses + Args : There may be additional arguments which are specific + to the access method (see methods 'new' or '_initialize' + of the access-specific implementations (such as module + Bio::Tools::Run::AnalysisFactory::soap for a SOAP-based access). + + The recognised and used arguments are: + -access + -location + -httpproxy + -timeout + +It builds, populates and returns a new C object. This +is how it is seen from the outside. But in fact, it builds, populates +and returns a more specific lower-level object, for example +C object - which one it is it depends on the C<-access> +parameter. + +=over 4 + +=item -access + +It indicates what lower-level module to load. Default is 'soap'. +Other (but future) possibilities are: + + -access => 'novella' + -access => 'local' + +=item -location + +A location of the service. The contents is access-specific (see +details in the lower-level implementation modules). + +Default is C (there are +services running at European Bioinformatics Institute on top of most +of EMBOSS analyses, and on some others). + +=item -httpproxy + +In addition to the I parameter, you may need to specify also +a location/URL of an HTTP proxy server (if your site requires +one). The expected format is C. There is no +default value. It is also an access-specific parameter which may not +be used by all access methods. + +=item -timeout + +For long(er) running jobs the HTTP connection may be time-outed. In +order to avoid it (or, vice-versa, to call timeout sooner) you may +specify C with the number of seconds the connection will be +kept alive. Zero means to keep it alive forever. The default value is +two minutes. + +=back + +=cut + +sub new { + my ($caller,@args) = @_; + my $class = ref($caller) || $caller; + + if ($class eq 'Bio::Tools::Run::AnalysisFactory') { + + # this is called only the first time when somebody calls: 'new + # Bio::Tools::Run::AnalysisFactory (...)', and it actually loads a + # 'real-work-doing' module and call this new() method again + # (unless the loaded module has its own new() method) + + my %param = @args; + @param { map { lc $_ } keys %param } = values %param; # lowercase keys + my $access = + $param {'-access'} || # use -access parameter + $class->_guess_access ( \%param ) || # or guess from other parameters + 'soap'; # or use a default access method + $access = "\L$access"; # normalize capitalization to lower case + + # remember the access method (putting it into @args means that the + # object - when created - will remember it) + push (@args, (-access => $access)) unless $param {'-access'}; + + # load module with the real implementation - as defined in $access + return undef unless (&_load_access_module ($access)); + + # this calls this same method new() - but now its object part + # (see the upper branche above) is called + return "Bio::Tools::Run::AnalysisFactory::$access"->new (@args); + + } else { + + # if $caller is an object, or if it is an underlying + # 'real-work-doing' class (e.g. Bio::Tools::Run::AnalysisFactory::soap) + # then we want to call SUPER to create and bless a new object + + my ($self) = $class->SUPER::new (@args); + + # now the $self is an empty object - we will populate it from + # the $caller - if $caller is an object (so we do cloning here) + + if (ref ($caller)) { + %{ $self } = %{ $caller }; + } + + # and finally add values from '@args' into the newly created + # object (the values will overwrite the values copied above); + # this is done by calling '_initialize' of the 'real-work-doing' + # class (if there is no one there, there is always an empty one + # in Bio::Root::Root) + + $self->_initialize (@args); + return $self; + } + +} + +# ----------------------------------------------------------------------------- + +=head2 _load_access_module + + Usage : $class->_load_access_module ($access) + Returns : 1 on success, undef on failure + Args : 'access' should contain the last part of the + name of a module who does the real implementation + +It does (in the run-time) a similar thing as + + require Bio::Tools::Run::AnalysisFactory::$access + +It prints an error on STDERR if it fails to find and load the module +(for example, because of the compilation errors in the module). + +=cut + +sub _load_access_module { + my ($access) = @_; + + my $load = "Bio/Tools/Run/AnalysisFactory/$access.pm"; + eval { + require $load; + }; + + if ( $@ ) { + Bio::Root::Root->throw (<_guess_access ($rh_params) + Returns : string with a guessed access protocol (e.g. 'soap'), + or undef if the guessing failed + Args : 'rh_params' is a hash reference containing parameters given + to the 'new' method. + +It makes an expert guess what kind of access/transport protocol should +be used to access the underlying analysis. The guess is based on the +parameters in I. Rememeber that this method is called only +if there was no I<-access> parameter which could tell directly what +access method to use. + +=cut + +sub _guess_access { + my ($class, $rh_params) = @_; + return undef; +} + +# ----------------------------------------------------------------------------- + +=head2 VERSION and Revision + + Usage : print $Bio::Tools::Run::AnalysisFactory::VERSION; + print $Bio::Tools::Run::AnalysisFactory::Revision; + +=cut + +1; +__END__ diff --git a/Bio/Tools/Run/WrapperBase.pm b/Bio/Tools/Run/WrapperBase.pm new file mode 100644 index 000000000..74efe37fe --- /dev/null +++ b/Bio/Tools/Run/WrapperBase.pm @@ -0,0 +1,511 @@ +# +# BioPerl module for Bio::Tools::Run::WrapperBase +# +# Please direct questions and support issues to +# +# Cared for by Jason Stajich +# +# Copyright Jason Stajich +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::WrapperBase - A Base object for wrappers around executables + +=head1 SYNOPSIS + + # do not use this object directly, it provides the following methods + # for its subclasses + + my $errstr = $obj->error_string(); + my $exe = $obj->executable(); + $obj->save_tempfiles($booleanflag) + my $outfile= $obj->outfile_name(); + my $tempdir= $obj->tempdir(); # get a temporary dir for executing + my $io = $obj->io; # Bio::Root::IO object + my $cleanup= $obj->cleanup(); # remove tempfiles + + $obj->run({-arg1 => $value}); + +=head1 DESCRIPTION + +This is a basic module from which to build executable wrapper modules. +It has some basic methods to help when implementing new modules. + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion + http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +I + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track of +the bugs and their resolution. Bug reports can be submitted via the +web: + + https://github.com/bioperl/bioperl-live/issues + +=head1 AUTHOR - Jason Stajich + +Email jason-at-bioperl.org + +=head1 CONTRIBUTORS + +Sendu Bala, bix@sendu.me.uk + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + + +# Let the code begin... + + +package Bio::Tools::Run::WrapperBase; +use strict; + +# Object preamble - inherits from Bio::Root::Root + +use base qw(Bio::Root::Root); + +use File::Spec; +use File::Path qw(); # don't import anything + +=head2 run + + Title : run + Usage : $wrapper->run({ARGS HERE}); + Function: Support generic running with args passed in + as a hashref + Returns : Depends on the implementation, status OR data + Args : hashref of named arguments + + +=cut + +sub run { + my ($self,@args) = @_; + $self->throw_not_implemented(); +} + + +=head2 error_string + + Title : error_string + Usage : $obj->error_string($newval) + Function: Where the output from the last analysis run is stored. + Returns : value of error_string + Args : newvalue (optional) + + +=cut + +sub error_string{ + my ($self,$value) = @_; + if( defined $value) { + $self->{'_error_string'} = $value; + } + return $self->{'_error_string'} || ''; +} + +=head2 arguments + + Title : arguments + Usage : $obj->arguments($newval) + Function: Commandline parameters + Returns : value of arguments + Args : newvalue (optional) + + +=cut + +sub arguments { + my ($self,$value) = @_; + if(defined $value) { + $self->{'_arguments'} = $value; + } + return $self->{'_arguments'} || ''; +} + + +=head2 no_param_checks + + Title : no_param_checks + Usage : $obj->no_param_checks($newval) + Function: Boolean flag as to whether or not we should + trust the sanity checks for parameter values + Returns : value of no_param_checks + Args : newvalue (optional) + + +=cut + +sub no_param_checks{ + my ($self,$value) = @_; + if( defined $value || ! defined $self->{'no_param_checks'} ) { + $value = 0 unless defined $value; + $self->{'no_param_checks'} = $value; + } + return $self->{'no_param_checks'}; +} + +=head2 save_tempfiles + + Title : save_tempfiles + Usage : $obj->save_tempfiles($newval) + Function: Get/set the choice of if tempfiles in the temp dir (see tempdir()) + are kept or cleaned up. Default is '0', ie. delete temp files. + NB: This must be set to the desired value PRIOR to first creating + a temp dir with tempdir(). Any attempt to set this after tempdir creation will get a warning. + Returns : boolean + Args : none to get, boolean to set + +=cut + +sub save_tempfiles{ + my $self = shift; + my @args = @_; + if (($args[0]) && (exists ($self->{'_tmpdir'}))) { + $self->warn ("Tempdir already created; setting save_tempfiles will not affect cleanup behavior."); + } + return $self->io->save_tempfiles(@_); +} + +=head2 outfile_name + + Title : outfile_name + Usage : my $outfile = $wrapper->outfile_name(); + Function: Get/Set the name of the output file for this run + (if you wanted to do something special) + Returns : string + Args : [optional] string to set value to + + +=cut + +sub outfile_name{ + my ($self,$nm) = @_; + if( defined $nm || ! defined $self->{'_outfilename'} ) { + $nm = 'mlc' unless defined $nm; + $self->{'_outfilename'} = $nm; + } + return $self->{'_outfilename'}; +} + + +=head2 tempdir + + Title : tempdir + Usage : my $tmpdir = $self->tempdir(); + Function: Retrieve a temporary directory name (which is created) + Returns : string which is the name of the temporary directory + Args : none + + +=cut + +sub tempdir{ + my ($self) = shift; + + $self->{'_tmpdir'} = shift if @_; + unless( $self->{'_tmpdir'} ) { + $self->{'_tmpdir'} = $self->io->tempdir(CLEANUP => ! $self->save_tempfiles ); + } + unless( -d $self->{'_tmpdir'} ) { + mkdir($self->{'_tmpdir'},0777); + } + return $self->{'_tmpdir'}; +} + +=head2 cleanup + + Title : cleanup + Usage : $wrapper->cleanup(); + Function: Will cleanup the tempdir directory + Returns : none + Args : none + + +=cut + +sub cleanup{ + my ($self) = @_; + $self->io->_io_cleanup(); + if( defined $self->{'_tmpdir'} && -d $self->{'_tmpdir'} ) { + my $verbose = ($self->verbose >= 1) ? 1 : 0; + File::Path::rmtree( $self->{'_tmpdir'}, $verbose); + } +} + +=head2 io + + Title : io + Usage : $obj->io($newval) + Function: Gets a Bio::Root::IO object + Returns : Bio::Root::IO object + Args : none + + +=cut + +sub io{ + my ($self) = @_; + unless( defined $self->{'io'} ) { + $self->{'io'} = Bio::Root::IO->new(-verbose => $self->verbose); + } + return $self->{'io'}; +} + +=head2 version + + Title : version + Usage : $version = $wrapper->version() + Function: Returns the program version (if available) + Returns : string representing version of the program + Args : [Optional] value to (re)set version string + + +=cut + +sub version{ + my ($self,@args) = @_; + return; +} + +=head2 executable + + Title : executable + Usage : my $exe = $factory->executable(); + Function: Finds the full path to the executable + Returns : string representing the full path to the exe + Args : [optional] name of executable to set path to + [optional] boolean flag whether or not warn when exe is not found + +=cut + +sub executable { + my ($self, $exe, $warn) = @_; + + if (defined $exe) { + $self->{'_pathtoexe'} = $exe; + } + + unless( defined $self->{'_pathtoexe'} ) { + my $prog_path = $self->program_path; + + if ($prog_path) { + if (-f $prog_path && -x $prog_path) { + $self->{'_pathtoexe'} = $prog_path; + } + elsif ($self->program_dir) { + $self->warn("executable not found in $prog_path, trying system path...") if $warn; + } + } + unless ($self->{'_pathtoexe'}) { + my $exe; + if ( $exe = $self->io->exists_exe($self->program_name) ) { + $self->{'_pathtoexe'} = $exe; + } + else { + $self->warn("Cannot find executable for ".$self->program_name) if $warn; + $self->{'_pathtoexe'} = undef; + } + } + } + + # bail if we never found the executable + unless ( defined $self->{'_pathtoexe'}) { + $self->throw("Cannot find executable for ".$self->program_name . + ". path=\"".$self->program_path."\""); + } + return $self->{'_pathtoexe'}; +} + +=head2 program_path + + Title : program_path + Usage : my $path = $factory->program_path(); + Function: Builds path for executable + Returns : string representing the full path to the exe + Args : none + +=cut + +sub program_path { + my ($self) = @_; + my @path; + push @path, $self->program_dir if $self->program_dir; + push @path, $self->program_name.($^O =~ /mswin/i ? '.exe' : '') if $self->program_name; + return File::Spec->catfile(@path); +} + +=head2 program_dir + + Title : program_dir + Usage : my $dir = $factory->program_dir(); + Function: Abstract get method for dir of program. To be implemented + by wrapper. + Returns : string representing program directory + Args : none + +=cut + +sub program_dir { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 program_name + + Title : program_name + Usage : my $name = $factory->program_name(); + Function: Abstract get method for name of program. To be implemented + by wrapper. + Returns : string representing program name + Args : none + +=cut + +sub program_name { + my ($self) = @_; + $self->throw_not_implemented(); +} + +=head2 quiet + + Title : quiet + Usage : $factory->quiet(1); + if ($factory->quiet()) { ... } + Function: Get/set the quiet state. Can be used by wrappers to control if + program output is printed to the console or not. + Returns : boolean + Args : none to get, boolean to set + +=cut + +sub quiet { + my $self = shift; + if (@_) { $self->{quiet} = shift } + return $self->{quiet} || 0; +} + +=head2 _setparams() + + Title : _setparams + Usage : $params = $self->_setparams(-params => [qw(window evalue_cutoff)]) + Function: For internal use by wrapper modules to build parameter strings + suitable for sending to the program being wrapped. For each method + name supplied, calls the method and adds the method name (as modified + by optional things) along with its value (unless a switch) to the + parameter string + Example : $params = $self->_setparams(-params => [qw(window evalue_cutoff)], + -switches => [qw(simple large all)], + -double_dash => 1, + -underscore_to_dash => 1); + If window() and simple() had not been previously called, but + evalue_cutoff(0.5), large(1) and all(0) had been called, $params + would be ' --evalue-cutoff 0.5 --large' + Returns : parameter string + Args : -params => [] or {} # array ref of method names to call, + or hash ref where keys are method names and + values are how those names should be output + in the params string + -switches => [] or {}# as for -params, but no value is printed for + these methods + -join => string # define how parameters and their values are + joined, default ' '. (eg. could be '=' for + param=value) + -lc => boolean # lc() method names prior to output in string + -dash => boolean # prefix all method names with a single dash + -double_dash => bool # prefix all method names with a double dash + -mixed_dash => bool # prefix single-character method names with a + # single dash, and multi-character method names + # with a double-dash + -underscore_to_dash => boolean # convert all underscores in method + names to dashes + +=cut + +sub _setparams { + my ($self, @args) = @_; + + my ($params, $switches, $join, $lc, $d, $dd, $md, $utd) = + $self->_rearrange([qw(PARAMS + SWITCHES + JOIN + LC + DASH + DOUBLE_DASH + MIXED_DASH + UNDERSCORE_TO_DASH)], @args); + $self->throw('at least one of -params or -switches is required') unless ($params || $switches); + $self->throw("-dash, -double_dash and -mixed_dash are mutually exclusive") if (defined($d) + defined($dd) + defined($md) > 1); + $join ||= ' '; + + my %params = ref($params) eq 'HASH' ? %{$params} : map { $_ => $_ } @{$params}; + my %switches = ref($switches) eq 'HASH' ? %{$switches} : map { $_ => $_ } @{$switches}; + + my $param_string = ''; + for my $hash_ref (\%params, \%switches) { + while (my ($method, $method_out) = each %{$hash_ref}) { + my $value = $self->$method(); + next unless (defined $value); + next if (exists $switches{$method} && ! $value); + + $method_out = lc($method_out) if $lc; + my $method_length = length($method_out) if $md; + $method_out = '-'.$method_out if ($d || ($md && ($method_length == 1))); + $method_out = '--'.$method_out if ($dd || ($md && ($method_length > 1))); + $method_out =~ s/_/-/g if $utd; + + if ( exists $params{$method} ) { + # if value are quoted with " or ', re-quote it + if ( $value =~ m{^[\'\"]+(.+)[\'\"]+$} ) { + $value = '"'. $1 . '"'; + } + # quote values that contain spaces + elsif ( $value =~ m{\s+} ) { + $value = '"'. $value . '"'; + } + } + + $param_string .= ' '.$method_out.(exists $switches{$method} ? '' : $join.$value); + } + } + + return $param_string; +} + +sub DESTROY { + my $self= shift; + unless ( $self->save_tempfiles ) { + $self->cleanup(); + } + $self->SUPER::DESTROY(); +} + + +1; diff --git a/Bio/Tools/Run/WrapperBase/CommandExts.pm b/Bio/Tools/Run/WrapperBase/CommandExts.pm new file mode 100644 index 000000000..1a0e099e2 --- /dev/null +++ b/Bio/Tools/Run/WrapperBase/CommandExts.pm @@ -0,0 +1,1404 @@ +# +# BioPerl module for Bio::Tools::Run::WrapperBase::CommandExts +# +# Please direct questions and support issues to +# +# Cared for by Mark A. Jensen +# +# Copyright Mark A. Jensen +# +# You may distribute this module under the same terms as perl itself + +# POD documentation - main docs before the code + +=head1 NAME + +Bio::Tools::Run::WrapperBase::CommandExts - Extensions to WrapperBase for handling programs with commands *ALPHA* + +=head1 SYNOPSIS + +Devs, see L. +Users, see L. + +=head1 DESCRIPTION + +This is a developer-focused experimental module. The main idea is to +extend L to make it relatively easy to +create run wrappers around I of related programs, like +C or C. + +Some definitions: + +=over + +=item * program + +The program is the command-line frontend application. C, for example, is run from the command line as follows: + + $ samtools view -bS in.bam > out.sam + $ samtools faidx + +=item * command + +The command is the specific component of a suite run by executing the +program. In the example above, C and C are commands. + +=item * command prefix + +The command prefix is an abbreviation of the command name used +internally by C method, and sometimes by the user of the +factory for specifying command line parameters to subcommands of +composite commands. + +=item * composite command + +A composite command is a pipeline or script representing a series of +separate executions of different commands. Composite commands can be +specified by configuring C appropriately; the composite +command can be run by the user from a factory in the same way as +ordinary commands. + +=item * options, parameters, switches and filespecs + +An option is any command-line option; i.e., a specification set off by +a command-line by a specifier (like C<-v> or C<--outfile>). Parameters +are command-line options that accept a value (C<-title mydb>); +switches are boolean flags (C<--no-filter>). Filespecs are barewords +at the end of the command line that usually indicate input or output +files. In this module, this includes files that capture STDIN, STDOUT, +or STDERR via redirection. + +=item * pseudo-program + +A "pseudo-program" is a way to refer to a collection of related +applications that are run independently from the command line, rather +than via a frontend program. The C suite of programs is an +example: C, C, etc. C can be +configured to create a single factory for a suite of related, +independent programs that treats each independent program as a +"pseudo-program" command. + +=back + +This module essentially adds the non-assembler-specific wrapper +machinery of fangly's L to the +L namespace, adding the general +command-handling capability of L. It creates run +factories that are automatically Bio::ParameterBaseI compliant, +meaning that C, C, +C, C, and C +are available. + +=head1 DEVELOPER INTERFACE + +C is currently set up to read particular package globals +which define the program, the commands available, command-line options +for those commands, and human-readable aliases for those options. + +The easiest way to use C is probably to create two modules: + + Bio::Tools::Run::YourRunPkg + Bio::Tools::Run::YourRunPkg::Config + +The package globals should be defined in the C module, and the +run package itself should begin with the following mantra: + + use YourRunPkg::Config; + use Bio::Tools::Run::WrapperBase; + use Bio::Tools::Run::WrapperBase::CommandExts; + sub new { + my $class = shift; + my @args = @_; + my $self = $class->SUPER::new(@args); + ... + return $self; + } + +The following globals can/should be defined in the C module: + + $program_name + $program_dir + $use_dash + $join + @program_commands + %command_prefixes + @program_params + @program_switches + %param_translation + %composite_commands + %command_files + +See L for detailed descriptions. + +The work of creating a run wrapper with C lies mainly in +setting up the globals. The key methods for the developer interface are: + +=over + +=item * program_dir($path_to_programs) + +Set this to point the factory to the executables. + +=item * _run(@file_args) + +Runs an instantiated factory with the given file args. Use in the + C method override. + +=item * _create_factory_set() + +Returns a hash of instantiated factories for each true command from a +composite command factory. The hash keys are the true command names, so +you could do + + $cmds = $composite_fac->_create_factory_set; + for (@true_commands) { + $cmds->{$_}->_run(@file_args); + } + +=item * executables($cmd,[$fullpath]) + +For pseudo-programs, this gets/sets the full path to the executable of +the true program corresponding to the command C<$cmd>. + +=back + +=head2 Implementing Composite Commands + +=head2 Implementing Pseudo-programs + +To indicate that a package wraps disparate programs under a single pseudo program, use an asterisk before the program name: + + package Bio::Tools::Run::YourPkg::Config; + ... + our $program_name = '*blast+'; + +and C<_run> will know what to do. Specify the rest of the globals as +if the desired programs were commands. Use the basename of the +programs for the command names. + +If all the programs can be found in a single directory, just specify +that directory in C. If not, use C to set the paths to each program explicitly: + + foreach (keys %cmdpaths) { + $self->executables($_, $cmdpaths{$_}); + } + +=head2 Config Globals + +Here is an example config file. Further details in prose are below. + + package Dummy::Config; + use strict; + use warnings; + no warnings qw(qw); + use Exporter; + our (@ISA, @EXPORT, @EXPORT_OK); + push @ISA, 'Exporter'; + @EXPORT = qw( + $program_name + $program_dir + $use_dash + $join + @program_commands + %command_prefixes + @program_params + @program_switches + %param_translation + %command_files + %composite_commands + ); + + our $program_name = '*flurb'; + our $program_dir = 'C:\cygwin\usr\local\bin'; + our $use_dash = 'mixed'; + our $join = ' '; + + our @program_commands = qw( + rpsblast + find + goob + blorb + multiglob + ); + + our %command_prefixes = ( + blastp => 'blp', + tblastn => 'tbn', + goob => 'g', + blorb => 'b', + multiglob => 'm' + ); + + our @program_params = qw( + command + g|narf + g|schlurb + b|scroob + b|frelb + m|trud + ); + + our @program_switches = qw( + g|freen + b|klep + ); + + our %param_translation = ( + 'g|narf' => 'n', + 'g|schlurb' => 'schlurb', + 'g|freen' => 'f', + 'b|scroob' => 's', + 'b|frelb' => 'frelb' + ); + + our %command_files = ( + 'goob' => [qw( fas faq )], + ); + + our %composite_commands = ( + 'multiglob' => [qw( blorb goob )] + ); + 1; + +C<$use_dash> can be one of C, C, or C. See L. + +There is a syntax for the C<%command_files> specification. The token +matching C<[a-zA-Z0-9_]+> in each element of each arrayref becomes the +named filespec parameter for the C<_run()> method in the wrapper +class. Additional symbols surrounding this token indicate how this +argument should be handled. Some examples: + + >out : stdout is redirected into the file + specified by (..., -out => $file,... ) + $file,... ) + 2>log : stderr is redirected into the file + specified by (..., -log => $file,... ) + #opt : this filespec argument is optional + (no throw if -opt => $option is missing) + 2>#log: if -log is not specified in the arguments, the stderr() + method will capture stderr + *lst : this filespec can take multiple arguments, + specify using an arrayref (..., -lst => [$file1, $file2], ...) + *#lst : an optional list + +The tokens above are examples; they can be anything matching the above regexp. + +=head1 USER INTERFACE + +Using a wrapper created with C: + +=over + +=item * Getting a list of available commands, parameters, and filespecs: + +To get a list of commands, simply: + + @commands = Bio::Tools::Run::ThePkg->available_commands; + +The wrapper will generally have human-readable aliases for each of the +command-line options for the wrapped program and commands. To obtain a +list of the parameters and switches available for a particular +command, do + + $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb' ); + @params = $factory->available_parameters('params'); + @switches = $factory->available_parameters('switches'); + @filespec = $factory->available_parameters('filespec'); + @filespec = $factory->filespec; # alias + +=item * Create factories + +The factory is a handle on the program and command you wish to +run. Create a factory using C to set command-line parameters: + + $factory = Bio::Tools::Run::ThePkg->new( -command => 'glurb', + -freen => 1, + -furschlugginer => 'vreeble' ); + +A shorthand for this is: + + $factory = Bio::Tools::Run::ThePkg->new_glurb( + -freen => 1, + -furschlugginer => 'vreeble' ); + +=item * Running programs + +To run the program, use the C method, providing filespecs as arguments + + $factory = Bio::Tools::Run::ThePkg->new_assemble( -min_qual => 63 ); + $factory->run( -faq1 => 'read1.fq', -faq2 => 'read2.fq', + -ref => 'refseq.fas', -out => 'new.sam' ); + # do another + $factory->run( -faq1 => 'read-old1.fq', -faq2 => 'read-old2.fq', + -ref => 'refseq.fas', -out => 'old.sam' ); + +Messages on STDOUT and STDERR are dumped into their respective attributes: + + $stdout = $factory->stdout; + $stderr = $factory->stderr; + +unless STDOUT and/or STDERR are part of the named files in the filespec. + +=item * Setting/getting/resetting/polling parameters. + +A C-based factory is always L +compliant. That means that you may set, get, and reset parameters +using C, C, and +C. You can ask whether parameters have changed since +they were last accessed by using the predicate +C. See L for more details. + +Once set, parameters become attributes of the factory. Thus, you can get their values as follows: + + if ($factory->freen) { + $furs = $factory->furshlugginer; + #... + } + +=back + +=head1 FEEDBACK + +=head2 Mailing Lists + +User feedback is an integral part of the evolution of this and other +Bioperl modules. Send your comments and suggestions preferably to +the Bioperl mailing list. Your participation is much appreciated. + + bioperl-l@bioperl.org - General discussion +http://bioperl.org/wiki/Mailing_lists - About the mailing lists + +=head2 Support + +Please direct usage questions or support issues to the mailing list: + +L + +rather than to the module maintainer directly. Many experienced and +reponsive experts will be able look at the problem and quickly +address it. Please include a thorough description of the problem +with code and data examples if at all possible. + +=head2 Reporting Bugs + +Report bugs to the Bioperl bug tracking system to help us keep track +of the bugs and their resolution. Bug reports can be submitted via +the web: + + https://github.com/bioperl/bioperl-live/issues + +=head1 AUTHOR - Mark A. Jensen + +Email maj -at- fortinbras -dot- us + +Describe contact details here + +=head1 CONTRIBUTORS + +Dan Kortschak ( dan -dot- kortschak -at- adelaide -dot- edu -dot- au ) + +=head1 APPENDIX + +The rest of the documentation details each of the object methods. +Internal methods are usually preceded with a _ + +=cut + +# Let the code begin... + +package Bio::Tools::Run::WrapperBase; # need these methods in WrapperBase/maj +use strict; +use warnings; +no warnings qw(redefine); + +use Bio::Root::Root; +use File::Spec; +use IPC::Run; +use base qw(Bio::Root::Root Bio::ParameterBaseI); + +our $AUTOLOAD; + +=head2 new() + + Title : new + Usage : + Function: constructor for WrapperBase::CommandExts ; + correctly binds configuration variables + to the WrapperBase object + Returns : Bio::Tools::Run::WrapperBase object with command extensions + Args : + Note : this method subsumes the old _register_program_commands and + _set_program_options, leaving out the assembler-specific + parms ($qual_param and out_type()) + +=cut + +sub new { + my ($class, @args) = @_; + my $self = bless ({}, $class); + # pull in *copies* of the Config variables from the caller namespace: + my ($pkg, @goob) = caller(); + my ($commands, + $prefixes, + $params, + $switches, + $translation, + $use_dash, + $join, + $name, + $dir, + $composite_commands, + $files); + for (qw( @program_commands + %command_prefixes + @program_params + @program_switches + %param_translation + $use_dash + $join + $program_name + $program_dir + %composite_commands + %command_files ) ) { + my ($sigil, $var) = m/(.)(.*)/; + my $qualvar = "${sigil}${pkg}::${var}"; + for ($sigil) { + /\@/ && do { $qualvar = "\[$qualvar\]" }; + /\%/ && do { $qualvar = "\{$qualvar\}" }; + } + my $locvar = "\$${var}"; + $locvar =~ s/program_|command_|param_//g; + eval "$locvar = $qualvar"; + } + # set up the info registry hash + my %registry; + if ($composite_commands) { + $self->_register_composite_commands($composite_commands, + $params, + $switches, + $prefixes); + } + @registry{qw( _commands _prefixes _files + _params _switches _translation + _composite_commands )} = + ($commands, $prefixes, $files, + $params, $switches, $translation, + $composite_commands); + $self->{_options} = \%registry; + if (not defined $use_dash) { + $self->{'_options'}->{'_dash'} = 1; + } else { + $self->{'_options'}->{'_dash'} = $use_dash; + } + if (not defined $join) { + $self->{'_options'}->{'_join'} = ' '; + } else { + $self->{'_options'}->{'_join'} = $join; + } + if ($name =~ /^\*/) { + $self->is_pseudo(1); + $name =~ s/^\*//; + } + $self->program_name($name) if not defined $self->program_name(); + $self->program_dir($dir) if not defined $self->program_dir(); + $self->set_parameters(@args); + $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI + return $self; +} + +=head2 program_name + + Title : program_name + Usage : $factory->program_name($name) + Function: get/set the executable name + Returns: string + Args : string + +=cut + +sub program_name { + my ($self, $val) = @_; + $self->{'_program_name'} = $val if $val; + return $self->{'_program_name'}; +} + +=head2 program_dir + + Title : program_dir + Usage : $factory->program_dir($dir) + Function: get/set the program dir + Returns: string + Args : string + +=cut + +sub program_dir { + my ($self, $val) = @_; + $self->{'_program_dir'} = $val if $val; + return $self->{'_program_dir'}; +} + +=head2 _register_program_commands() + + Title : _register_program_commands + Usage : $factory->_register_program_commands( \@commands, \%prefixes ) + Function: Register the commands a program accepts (for programs that act + as frontends for a set of commands, each command having its own + set of params/switches) + Returns : true on success + Args : arrayref to a list of commands (scalar strings), + hashref to a translation table of the form + { $prefix1 => $command1, ... } [optional] + Note : To implement a program with this kind of calling structure, + include a parameter called 'command' in the + @program_params global + Note : The translation table is used to associate parameters and + switches specified in _set_program_options with the correct + program command. In the globals @program_params and + @program_switches, specify elements as 'prefix1|param' and + 'prefix1|switch', etc. + +=cut + +=head2 _set_program_options + + Title : _set_program_options + Usage : $factory->_set_program_options( \@ args ); + Function: Register the parameters and flags that an assembler takes. + Returns : 1 for success + Args : - arguments passed by the user + - parameters that the program accepts, optional (default: none) + - switches that the program accepts, optional (default: none) + - parameter translation, optional (default: no translation occurs) + - dash option for the program parameters, [1|single|double|mixed], + optional (default: yes, use single dashes only) + - join, optional (default: ' ') + +=cut + +=head2 _translate_params + + Title : _translate_params + Usage : @options = @{$assembler->_translate_params( )}; + Function: Translate the Bioperl arguments into the arguments to pass to the + program on the command line + Returns : Arrayref of arguments + Args : none + +=cut + +sub _translate_params { + my ($self) = @_; + # Get option string + my ($params, $switches, $join, $dash, $translat) = + @{$self->{_options}}{qw(_params _switches _join _dash _translation)}; + + # access the multiple dash choices of _setparams... + my @dash_args; + $dash ||= 1; # default as advertised + for ($dash) { + $_ eq '1' && do { + @dash_args = ( -dash => 1 ); + last; + }; + /^s/ && do { #single dash only + @dash_args = ( -dash => 1); + last; + }; + /^d/ && do { # double dash only + @dash_args = ( -double_dash => 1); + last; + }; + /^m/ && do { # mixed dash: one-letter opts get -, + # long opts get -- + @dash_args = ( -mixed_dash => 1); + last; + }; + do { + $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); + @dash_args = ( -dash => 1 ); + }; + } + my $options = $self->_setparams( + -params => $params, + -switches => $switches, + -join => $join, + @dash_args + ); + + # Translate options + # parse more carefully - bioperl-run issue #12 + $options =~ s/^\s+//; + $options =~ s/\s+$//; + my @options; + my $in_quotes; + for (split(/(\s|$join)/, $options)) { + if (/^-/) { + push @options, $_; + } + elsif (s/^"//) { + $in_quotes=1 unless (s/["']$//); + push @options, $_; + } + elsif (s/"$//) { + $options[-1] .= $_; + $in_quotes=0; + } + else { + $in_quotes ? $options[-1] .= $_ : + push(@options, $_); + } + } + $self->throw("Unmatched quote in option value") if $in_quotes; + for (my $i = 0; $i < scalar @options; $i++) { + my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ ); + if (defined $name) { + if ($name =~ /command/i) { + $name = $options[$i+2]; # get the command + splice @options, $i, 4; + $i--; + # don't add the command if this is a pseudo-program + unshift @options, $name unless ($self->is_pseudo); # put command first + } + elsif (defined $$translat{$name}) { + $options[$i] = $prefix.$$translat{$name}; + } + } + else { + splice @options, $i, 1; + $i--; + } + } + + @options = grep (!/^\s*$/,@options); + # this is a kludge for mixed options: the reason mixed doesn't + # work right on the pass through _setparams is that the + # *aliases* and not the actual params are passed to it. + # here we just rejigger the dashes + if ($dash =~ /^m/) { + s/--([a-z0-9](?:\s|$))/-$1/gi for @options; + } + # Now arrayify the options + + return \@options; +} + +=head2 executable() + + Title : executable + Usage : + Function: find the full path to the main executable, + or to the command executable for pseudo-programs + Returns : full path, if found + Args : [optional] explicit path to the executable + (will set the appropriate command exec if + applicable) + [optional] boolean flag whether or not to warn when exe no found + Note : overrides WrapperBase.pm + +=cut + +sub executable { + my $self = shift; + my ($exe, $warn) = @_; + if ($self->is_pseudo) { + return $self->{_pathtoexe} = $self->executables($self->command,$exe); + } + + # otherwise + # setter + if (defined $exe) { + $self->throw("binary '$exe' does not exist") unless -e $exe; + $self->throw("'$exe' is not executable") unless -x $exe; + return $self->{_pathtoexe} = $exe; + } + + # getter + return $self->{_pathtoexe} if defined $self->{_pathstoexe}; + + # finder + return $self->{_pathtoexe} = $self->_find_executable($exe, $warn); +} + +=head2 executables() + + Title : executables + Usage : + Function: find the full path to a command's executable + Returns : full path (scalar string) + Args : command (scalar string), + [optional] explicit path to this command exe + [optional] boolean flag whether or not to warn when exe no found + +=cut + +sub executables { + my $self = shift; + my ($cmd, $exe, $warn) = @_; + # for now, barf if this is not a pseudo program + $self->throw("This wrapper represents a single program with commands, not multiple programs; can't use executables()") unless $self->is_pseudo; + $self->throw("Command name required at arg 1") unless defined $cmd; + $self->throw("The desired executable '$cmd' is not registered as a command") unless grep /^$cmd$/, @{$self->{_options}->{_commands}}; + + # setter + if (defined $exe) { + $self->throw("binary '$exe' does not exist") unless -e $exe; + $self->throw("'$exe' is not executable") unless -x $exe; + $self->{_pathstoexe} = {} unless defined $self->{_pathstoexe}; + return $self->{_pathstoexe}->{$cmd} = $exe; + } + + # getter + return $self->{_pathstoexe}->{$cmd} if defined $self->{_pathstoexe}->{$cmd}; + + $exe ||= $cmd; + # finder + return $self->{_pathstoexe}->{$cmd} = $self->_find_executable($exe, $warn); +} + +=head2 _find_executable() + + Title : _find_executable + Usage : my $exe_path = $fac->_find_executable($exe, $warn); + Function: find the full path to a named executable, + Returns : full path, if found + Args : name of executable to find + [optional] boolean flag whether or not to warn when exe no found + Note : differs from executable and executables in not + setting any object attributes + +=cut + +sub _find_executable { + my $self = shift; + my ($exe, $warn) = @_; + + if ($self->is_pseudo && !$exe) { + if (!$self->command) { + # this throw probably appropriate + # the rest are now warns if $warn.../maj + $self->throw( + "The ".__PACKAGE__." wrapper represents several different programs;". + "arg1 to _find_executable must be specified explicitly,". + "or the command() attribute set"); + } + else { + $exe = $self->command; + } + } + $exe ||= $self->program_path; + + my $path; + if ($self->program_dir) { + $path = File::Spec->catfile($self->program_dir, $exe); + } else { + $path = $exe; + $self->warn('Program directory not specified; use program_dir($path).') if $warn; + } + + # use provided info - we are allowed to follow symlinks, but refuse directories + map { return $path.$_ if ( -x $path.$_ && !(-d $path.$_) ) } ('', '.exe') if defined $path; + + # couldn't get path to executable from provided info, so use system path + $path = $path ? " in $path" : undef; + $self->warn("Executable $exe not found$path, trying system path...") if $warn; + if ($path = $self->io->exists_exe($exe)) { + return $path; + } else { + $self->warn("Cannot find executable for program '".($self->is_pseudo ? $self->command : $self->program_name)."'") if $warn; + return; + } +} + +=head2 _register_composite_commands() + + Title : _register_composite_commands + Usage : + Function: adds subcomand params and switches for composite commands + Returns : true on success + Args : \%composite_commands, + \@program_params, + \@program_switches + +=cut + +sub _register_composite_commands { + my $self = shift; + my ($composite_commands, $program_params, + $program_switches, $command_prefixes) = @_; + my @sub_params; + my @sub_switches; + foreach my $cmd (keys %$composite_commands) { + my $pfx = $command_prefixes->{$cmd} || $cmd; + foreach my $subcmd ( @{$$composite_commands{$cmd}} ) { + my $spfx = $command_prefixes->{$subcmd} || $subcmd; + my @sub_program_params = grep /^$spfx\|/, @$program_params; + my @sub_program_switches = grep /^$spfx\|/, @$program_switches; + for (@sub_program_params) { + m/^$spfx\|(.*)/; + push @sub_params, "$pfx\|${spfx}_".$1; + } + for (@sub_program_switches) { + m/^$spfx\|(.*)/; + push @sub_switches, "$pfx\|${spfx}_".$1; + } + } + } + push @$program_params, @sub_params; + push @$program_switches, @sub_switches; + # translations for subcmd params/switches not necessary + return 1; +} + +=head2 _create_factory_set() + + Title : _create_factory_set + Usage : @facs = $self->_create_factory_set + Function: instantiate a set of individual command factories for + a given composite command + Factories will have the correct parameter fields set for + their own subcommand + Returns : hash of factories: ( $subcmd_prefix => $subcmd_factory, ... ) + Args : none + +=cut + +sub _create_factory_set { + my $self = shift; + $self->throw('command not set') unless $self->command; + my $cmd = $self->command; + $self->throw('_create_factory_set only works on composite commands') + unless grep /^$cmd$/, keys %{$self->{_options}->{_composite_commands}}; + my %ret; + my $class = ref $self; + my $subargs_hash = $self->_collate_subcmd_args($cmd); + for (keys %$subargs_hash) { + $ret{$_} = $class->new( -command => $_, @{$$subargs_hash{$_}} ); + } + return %ret; +} + +=head2 _collate_subcmd_args() + + Title : _collate_subcmd_args + Usage : $args_hash = $self->_collate_subcmd_args + Function: collate parameters and switches into command-specific + arg lists for passing to new() + Returns : hash of named argument lists + Args : [optional] composite cmd prefix (scalar string) + [default is 'run'] + +=cut + +sub _collate_subcmd_args { + my $self = shift; + my $cmd = shift; + my %ret; + # default command is 'run' + $cmd ||= 'run'; + return unless $self->{'_options'}->{'_composite_commands'}; + return unless $self->{'_options'}->{'_composite_commands'}->{$cmd}; + my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}}; + + my $cur_options = $self->{'_options'}; + # collate + foreach my $subcmd (@subcmds) { + # find the composite cmd form of the argument in + # the current params and switches + # e.g., map_max_mismatches + my $pfx = $self->{_options}->{_prefixes}->{$subcmd} || $subcmd; + my @params = grep /^${pfx}_/, @{$$cur_options{'_params'}}; + my @switches = grep /^${pfx}_/, @{$$cur_options{'_switches'}}; + $ret{$subcmd} = []; + # create an argument list suitable for passing to new() of + # the subcommand factory... + foreach my $opt (@params, @switches) { + my $subopt = $opt; + $subopt =~ s/^${pfx}_//; + push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt; + } + } + return \%ret; +} + +=head2 _run + + Title : _run + Usage : $fac->_run( @file_args ) + Function: Run a command as specified during object contruction + Returns : true on success + Args : a specification of the files to operate on according + to the filespec + +=cut + +sub _run { + my ($self, @args) = @_; + # _translate_params will provide an array of command/parameters/switches + # -- these are set at object construction + # to set up the run, need to add the files to the call + # -- provide these as arguments to this function + my $cmd = $self->command if $self->can('command'); + my $opts = $self->{_options}; + my %args; + $self->throw("No command specified for the object") unless $cmd; + # setup files necessary for this command + my $filespec = $opts->{'_files'}->{$cmd}; + my @switches; + my ($in, $out, $err); + # some applications rely completely on switches + if (defined $filespec && @$filespec) { + # parse args based on filespec + # require named args + $self->throw("Named args are required") unless !(@args % 2); + s/^-// for @args; + %args = @args; + # validate + my @req = map { + my $s = $_; + $s =~ s/^-.*\|//; + $s =~ s/^[012]?[<>]//; + $s =~ s/[^a-zA-Z0-9_]//g; + $s + } grep !/[#]/, @$filespec; + !defined($args{$_}) && $self->throw("Required filearg '$_' not specified") for @req; + # set up redirects and file switches + for (@$filespec) { + m/^1?>#?(.*)/ && do { + defined($args{$1}) && ( open $out, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); + next; + }; + m/^2>#?(.*)/ && do { + defined($args{$1}) && ( open $err, '>', $args{$1} or $self->throw("Could not write file '$args{$1}': $!") ); + next; + }; + m/^<#?(.*)/ && do { + defined($args{$1}) && ( open $in, '<', $args{$1} or $self->throw("Could not read file '$args{$1}': $!") ); + next; + }; + if (m/^-(.*)\|/) { + push @switches, $self->_dash_switch($1); + } else { + push @switches, undef; + } + } + } + my $dum; + $in || ($in = \$dum); + $out || ($out = \$self->{'stdout'}); + $err || ($err = \$self->{'stderr'}); + + # Get program executable + my $exe = $self->executable; + $self->throw("Can't find executable for '".($self->is_pseudo ? $self->command : $self->program_name)."'; can't continue") unless $exe; + + # Get command-line options + my $options = $self->_translate_params(); + # Get file specs sans redirects in correct order + my @specs = map { + my $s = $_; + $s =~ s/^-.*\|//; + $s =~ s/[^a-zA-Z0-9_]//g; + $s + } grep !/[<>]/, @$filespec; + my @files = @args{@specs}; + # expand arrayrefs + my $l = $#files; + + # Note: below code block may be brittle, see link on this: + # http://lists.open-bio.org/pipermail/bioperl-l/2010-June/033439.html + + for (0..$l) { + if (ref($files[$_]) eq 'ARRAY') { + splice(@switches, $_, 1, ($switches[$_]) x @{$files[$_]}); + splice(@files, $_, 1, @{$files[$_]}); + } + } + + @files = map { + my $s = shift @switches; + defined $_ ? ($s, $_): () + } @files; + @files = map { defined $_ ? $_ : () } @files; # squish undefs + my @ipc_args = ( $exe, @$options, @files ); + $self->{_last_execution} = join( $self->{'_options'}->{'_join'}, @ipc_args ); + eval { + IPC::Run::run(\@ipc_args, $in, $out, $err) or + die ("There was a problem running $exe : ".$$err); + }; + + if ($@) { + $self->throw("$exe call crashed: $@") unless $self->no_throw_on_crash; + return 0; + } + + return 1; +} + + + +=head2 no_throw_on_crash() + + Title : no_throw_on_crash + Usage : + Function: prevent throw on execution error + Returns : + Args : [optional] boolean + +=cut + +sub no_throw_on_crash { + my $self = shift; + return $self->{'_no_throw'} = shift if @_; + return $self->{'_no_throw'}; +} + +=head2 last_execution() + + Title : last_execution + Usage : + Function: return the last executed command with options + Returns : string of command line sent to IPC::Run + Args : + +=cut + +sub last_execution { + my $self = shift; + return $self->{'_last_execution'}; +} + +=head2 _dash_switch() + + Title : _dash_switch + Usage : $version = $fac->_dash_switch( $switch ) + Function: Returns an appropriately dashed switch for the executable + Args : A string containing a switch without dashes + Returns : string containing an appropriately dashed switch for the current executable + +=cut + +sub _dash_switch { + my ($self, $switch) = @_; + + my $dash = $self->{'_options'}->{'_dash'}; + for ($dash) { + $_ eq '1' && do { + $switch = '-'.$switch; + last; + }; + /^s/ && do { #single dash only + $switch = '-'.$switch; + last; + }; + /^d/ && do { # double dash only + $switch = '--'.$switch; + last; + }; + /^m/ && do { # mixed dash: one-letter opts get -, + $switch = '-'.$switch; + $switch =~ s/^(-[a-z0-9](?:\w+))$/-$1/i; + last; + }; + do { + $self->warn( "Dash spec '$dash' not recognized; using 'single'" ); + $switch = '-'.$switch; + }; + } + + return $switch; +} + +=head2 stdout() + + Title : stdout + Usage : $fac->stdout() + Function: store the output from STDOUT for the run, + if no file specified in _run arguments + Example : + Returns : scalar string + Args : on set, new value (a scalar or undef, optional) + +=cut + +sub stdout { + my $self = shift; + return $self->{'stdout'} = shift if @_; + return $self->{'stdout'}; +} + +=head2 stderr() + + Title : stderr + Usage : $fac->stderr() + Function: store the output from STDERR for the run, + if no file is specified in _run arguments + Example : + Returns : scalar string + Args : on set, new value (a scalar or undef, optional) + +=cut + +sub stderr { + my $self = shift; + return $self->{'stderr'} = shift if @_; + return $self->{'stderr'}; +} + +=head2 is_pseudo() + + Title : is_pseudo + Usage : $obj->is_pseudo($newval) + Function: returns true if this factory represents + a pseudo-program + Example : + Returns : value of is_pseudo (boolean) + Args : on set, new value (a scalar or undef, optional) + +=cut + +sub is_pseudo { + my $self = shift; + + return $self->{'is_pseudo'} = shift if @_; + return $self->{'is_pseudo'}; +} + +=head2 AUTOLOAD + +AUTOLOAD permits + + $class->new_yourcommand(@args); + +as an alias for + + $class->new( -command => 'yourcommand', @args ); + +=cut + +sub AUTOLOAD { + my $class = shift; + my $tok = $AUTOLOAD; + my @args = @_; + $tok =~ s/.*:://; + unless ($tok =~ /^new_/) { + $class->throw("Can't locate object method '$tok' via package '".ref($class)?ref($class):$class); + } + my ($cmd) = $tok =~ m/new_(.*)/; + return $class->new( -command => $cmd, @args ); +} + +=head1 Bio:ParameterBaseI compliance + +=head2 set_parameters() + + Title : set_parameters + Usage : $pobj->set_parameters(%params); + Function: sets the parameters listed in the hash or array + Returns : true on success + Args : [optional] hash or array of parameter/values. + +=cut + +sub set_parameters { + my ($self, @args) = @_; + + # currently stored stuff + my $opts = $self->{'_options'}; + my $params = $opts->{'_params'}; + my $switches = $opts->{'_switches'}; + my $translation = $opts->{'_translation'}; + my $use_dash = $opts->{'_dash'}; + my $join = $opts->{'_join'}; + unless (($self->can('command') && $self->command) + || (grep /command/, @args)) { + push @args, '-command', 'run'; + } + my %args = @args; + my $cmd = $args{'-command'} || $args{'command'} || ($self->can('command') && $self->command); + if ($cmd) { + my (@p,@s, %x); + $self->warn('Command present, but no commands registered') unless $self->{'_options'}->{'_commands'}; + $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}}; + $cmd = $self->{_options}->{_prefixes}->{$cmd} || $cmd; + + @p = (grep(!/^.*?\|/, @$params), grep(/^${cmd}\|/, @$params)); + @s = (grep(!/^.*?\|/, @$switches), grep(/^${cmd}\|/, @$switches)); + s/.*?\|// for @p; + s/.*?\|// for @s; + @x{@p, @s} = @{$translation}{ + grep( !/^.*?\|/, @$params, @$switches), + grep(/^${cmd}\|/, @$params, @$switches) }; + $opts->{_translation} = $translation = \%x; + $opts->{_params} = $params = \@p; + $opts->{_switches} = $switches = \@s; + } + $self->_set_from_args( + \@args, + -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ], + -create => 1, + # when our parms are accessed, signal parameters are unchanged for + # future reads (until set_parameters is called) + -code => + ' my $self = shift; + $self->parameters_changed(0); + return $self->{\'_\'.$method} = shift if @_; + return $self->{\'_\'.$method};' + ); + # the question is, are previously-set parameters left alone when + # not specified in @args? + $self->parameters_changed(1); + return 1; +} + +=head2 reset_parameters() + + Title : reset_parameters + Usage : resets values + Function: resets parameters to either undef or value in passed hash + Returns : none + Args : [optional] hash of parameter-value pairs + +=cut + +sub reset_parameters { + my ($self, @args) = @_; + + my @reset_args; + # currently stored stuff + my $opts = $self->{'_options'}; + my $params = $opts->{'_params'}; + my $switches = $opts->{'_switches'}; + my $translation = $opts->{'_translation'}; + my $qual_param = $opts->{'_qual_param'}; + my $use_dash = $opts->{'_dash'}; + my $join = $opts->{'_join'}; + + # handle command name + my %args = @args; + my $cmd = $args{'-command'} || $args{'command'} || $self->command; + $args{'command'} = $cmd; + delete $args{'-command'}; + @args = %args; + # don't like this, b/c _set_program_args will create a bunch of + # accessors with undef values, but oh well for now /maj + + for my $p (@$params) { + push(@reset_args, $p => undef) unless grep /^[-]?$p$/, @args; + } + for my $s (@$switches) { + push(@reset_args, $s => undef) unless grep /^[-]?$s$/, @args; + } + push @args, @reset_args; + $self->set_parameters(@args); + $self->parameters_changed(1); +} + +=head2 parameters_changed() + + Title : parameters_changed + Usage : if ($pobj->parameters_changed) {...} + Function: Returns boolean true (1) if parameters have changed + Returns : Boolean (0 or 1) + Args : [optional] Boolean + +=cut + +sub parameters_changed { + my $self = shift; + return $self->{'_parameters_changed'} = shift if @_; + return $self->{'_parameters_changed'}; +} + +=head2 available_parameters() + + Title : available_parameters + Usage : @params = $pobj->available_parameters() + Function: Returns a list of the available parameters + Returns : Array of parameters + Args : 'params' for settable program parameters + 'switches' for boolean program switches + default: all + +=cut + +sub available_parameters { + my $self = shift; + my $subset = shift; + my $opts = $self->{'_options'}; + my @ret; + for ($subset) { + (!defined || /^a/) && do { + @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}}); + last; + }; + m/^p/i && do { + @ret = @{$opts->{'_params'}}; + last; + }; + m/^s/i && do { + @ret = @{$opts->{'_switches'}}; + last; + }; + m/^c/i && do { + @ret = @{$opts->{'_commands'}}; + last; + }; + m/^f/i && do { # get file spec + return @{$opts->{'_files'}->{$self->command}}; + }; + do { #fail + $self->throw("available_parameters: unrecognized subset"); + }; + } + return @ret; +} + +sub available_commands { shift->available_parameters('commands') } +sub filespec { shift->available_parameters('filespec') } + +=head2 get_parameters() + + Title : get_parameters + Usage : %params = $pobj->get_parameters; + Function: Returns list of key-value pairs of parameter => value + Returns : List of key-value pairs + Args : [optional] A string is allowed if subsets are wanted or (if a + parameter subset is default) 'all' to return all parameters + +=cut + +sub get_parameters { + my $self = shift; + my $subset = shift; + $subset ||= 'all'; + my @ret; + my $opts = $self->{'_options'}; + for ($subset) { + m/^p/i && do { #params only + for (@{$opts->{'_params'}}) { + push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; + } + last; + }; + m/^s/i && do { #switches only + for (@{$opts->{'_switches'}}) { + push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; + } + last; + }; + m/^a/i && do { # all + for ((@{$opts->{'_params'}},@{$opts->{'_switches'}})) { + push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; + } + last; + }; + do { + $self->throw("get_parameters: unrecognized subset"); + }; + } + return @ret; +} + +1; diff --git a/Changes b/Changes index a40522f62..250eb86e4 100644 --- a/Changes +++ b/Changes @@ -33,6 +33,14 @@ be removed. 1.7.3 - "To Be Named" + * The following modules have been moved here from the BioPerl-Run + distribution: + + Bio::Tools::Run::Analysis + Bio::Tools::Run::AnalysisFactory + Bio::Tools::Run::WrapperBase + Bio::Tools::Run::WrapperBase::CommandExts + [Code changes] * The deobfuscator has been removed. -- 2.11.4.GIT