2 # BioPerl module for Bio::Seq::SeqBuilder
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp at gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
13 # (c) Hilmar Lapp, hlapp at gmx.net, 2002.
14 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
16 # You may distribute this module under the same terms as perl itself.
17 # Refer to the Perl Artistic License (see the license accompanying this
18 # software package, or see http://www.perl.com/language/misc/Artistic.html)
19 # for the terms under which you may use, modify, and redistribute this module.
21 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26 # POD documentation - main docs before the code
30 Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
36 # usually you won't instantiate this yourself - a SeqIO object -
37 # you will have one already
38 my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
39 my $builder = $seqin->sequence_builder();
41 # if you need only sequence, id, and description (e.g. for
42 # conversion to FASTA format):
43 $builder->want_none();
44 $builder->add_wanted_slot('display_id','desc','seq');
46 # if you want everything except the sequence and features
47 $builder->want_all(1); # this is the default if it's untouched
48 $builder->add_unwanted_slot('seq','features');
50 # if you want only human sequences shorter than 5kb and skip all
52 $builder->add_object_condition(sub {
54 return 0 if $h->{'-length'} > 5000;
55 return 0 if exists($h->{'-species'}) &&
56 ($h->{'-species'}->binomial() ne "Homo sapiens");
60 # when you are finished with configuring the builder, just use
61 # the SeqIO API as you would normally
62 while(my $seq = $seqin->next_seq()) {
68 This is an implementation of L<Bio::Factory::ObjectBuilderI> used by
69 parsers of rich sequence streams. It provides for a relatively
70 easy-to-use configurator of the parsing flow.
72 Configuring the parsing process may be for you if you need much less
73 information, or much less sequence, than the stream actually
74 contains. Configuration can in both cases speed up the parsing time
75 considerably, because unwanted sections or the rest of unwanted
76 sequences are skipped over by the parser. This configuration could
77 also conserve memory if you're running out of available RAM.
79 See the methods of the class-specific implementation section for
80 further documentation of what can be configured.
86 User feedback is an integral part of the evolution of this and other
87 Bioperl modules. Send your comments and suggestions preferably to
88 the Bioperl mailing list. Your participation is much appreciated.
90 bioperl-l@bioperl.org - General discussion
91 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
95 Please direct usage questions or support issues to the mailing list:
97 I<bioperl-l@bioperl.org>
99 rather than to the module maintainer directly. Many experienced and
100 reponsive experts will be able look at the problem and quickly
101 address it. Please include a thorough description of the problem
102 with code and data examples if at all possible.
104 =head2 Reporting Bugs
106 Report bugs to the Bioperl bug tracking system to help us keep track
107 of the bugs and their resolution. Bug reports can be submitted via
110 https://github.com/bioperl/bioperl-live/issues
112 =head1 AUTHOR - Hilmar Lapp
114 Email hlapp at gmx.net
118 The rest of the documentation details each of the object methods.
119 Internal methods are usually preceded with a _
124 # Let the code begin...
127 package Bio
::Seq
::SeqBuilder
;
130 # Object preamble - inherits from Bio::Root::Root
133 use base
qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
135 my %slot_param_map = ("add_SeqFeature" => "features",
137 my %param_slot_map = ("features" => "add_SeqFeature",
143 Usage : my $obj = Bio::Seq::SeqBuilder->new();
144 Function: Builds a new Bio::Seq::SeqBuilder object
145 Returns : an instance of Bio::Seq::SeqBuilder
151 my($class,@args) = @_;
153 my $self = $class->SUPER::new
(@args);
155 $self->{'wanted_slots'} = [];
156 $self->{'unwanted_slots'} = [];
157 $self->{'object_conds'} = [];
158 $self->{'_objhash'} = {};
164 =head1 Methods for implementing L<Bio::Factory::ObjectBuilderI>
172 Function: Whether or not the object builder wants to populate the
173 specified slot of the object to be built.
175 The slot can be specified either as the name of the
176 respective method, or the initialization parameter that
177 would be otherwise passed to new() of the object to be
180 Note that usually only the parser will call this
181 method. Use add_wanted_slots and add_unwanted_slots for
185 Returns : TRUE if the object builder wants to populate the slot, and
187 Args : the name of the slot (a string)
193 my ($self,$slot) = @_;
196 $slot = substr($slot,1) if substr($slot,0,1) eq '-';
197 if($self->want_all()) {
198 foreach ($self->get_unwanted_slots()) {
199 # this always overrides in want-all mode
200 return 0 if($slot eq $_);
202 if(! exists($self->{'_objskel'})) {
203 $self->{'_objskel'} = $self->sequence_factory->create_object();
205 if(exists($param_slot_map{$slot})) {
206 $ok = $self->{'_objskel'}->can($param_slot_map{$slot});
208 $ok = $self->{'_objskel'}->can($slot);
211 # even if the object 'cannot' do this slot, it might have been
212 # added to the list of wanted slot, so carry on
214 foreach ($self->get_wanted_slots()) {
223 =head2 add_slot_value
225 Title : add_slot_value
227 Function: Adds one or more values to the specified slot of the object
230 Naming the slot is the same as for want_slot().
232 The object builder may further filter the content to be
233 set, or even completely ignore the request.
235 If this method reports failure, the caller should not add
236 more values to the same slot. In addition, the caller may
237 find it appropriate to abandon the object being built
240 This implementation will allow the caller to overwrite the
241 return value from want_slot(), because the slot is not
242 checked against want_slot().
244 Note that usually only the parser will call this method,
245 but you may call it from anywhere if you know what you are
246 doing. A derived class may be used to further manipulate
247 the value to be added.
250 Returns : TRUE on success, and FALSE otherwise
251 Args : the name of the slot (a string)
252 parameters determining the value to be set
256 alternatively, a list of slotname/value pairs in the style
257 of named parameters as they would be passed to new(), where
258 each element at an even index is the parameter (slot) name
259 starting with a dash, and each element at an odd index is
260 the value of the preceding name.
265 my ($self,$slot,@args) = @_;
267 my $h = $self->{'_objhash'};
269 # multiple named parameter variant of calling?
270 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
271 unshift(@args, $slot);
273 my $key = shift(@args);
274 $h->{$key} = shift(@args);
277 if($slot eq 'add_SeqFeature') {
278 $slot = '-'.$slot_param_map{$slot};
279 $h->{$slot} = [] unless $h->{$slot};
280 push(@
{$h->{$slot}}, @args);
282 $slot = '-'.$slot unless substr($slot,0,1) eq '-';
283 $h->{$slot} = $args[0];
293 Function: Whether or not the object builder is still interested in
294 continuing with the object being built.
296 If this method returns FALSE, the caller should not add any
297 more values to slots, or otherwise risks that the builder
298 throws an exception. In addition, make_object() is likely
299 to return undef after this method returned FALSE.
301 Note that usually only the parser will call this
302 method. Use add_object_condition for configuration.
305 Returns : TRUE if the object builder wants to continue building
306 the present object, and FALSE otherwise.
315 foreach my $cond ($self->get_object_conditions()) {
316 $ok = &$cond($self->{'_objhash'});
319 delete $self->{'_objhash'} unless $ok;
327 Function: Get the built object.
329 This method is allowed to return undef if no value has ever
330 been added since the last call to make_object(), or if
331 want_object() returned FALSE (or would have returned FALSE)
332 before calling this method.
334 For an implementation that allows consecutive building of
335 objects, a caller must call this method once, and only
336 once, between subsequent objects to be built. I.e., a call
337 to make_object implies 'end_object.'
340 Returns : the object that was built
349 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
350 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
352 $self->{'_objhash'} = {}; # reset
356 =head1 Implementation specific methods
358 These methods allow one to conveniently configure this sequence object
359 builder as to which slots are desired, and under which circumstances a
360 sequence object should be abandoned altogether. The default mode is
361 want_all(1), which means the builder will report all slots as wanted
362 that the object created by the sequence factory supports.
364 You can add specific slots you want through add_wanted_slots(). In
365 most cases, you will want to call want_none() before in order to relax
366 zero acceptance through a list of wanted slots.
368 Alternatively, you can add specific unwanted slots through
369 add_unwanted_slots(). In this case, you will usually want to call
370 want_all(1) before (which is the default if you never touched the
371 builder) to restrict unrestricted acceptance.
373 I.e., want_all(1) means want all slots except for the unwanted, and
374 want_none() means only those explicitly wanted.
376 If a slot is in both the unwanted and the wanted list, the following
377 rules hold. In want-all mode, the unwanted list overrules. In
378 want-none mode, the wanted list overrides the unwanted list. If this
379 is confusing to you, just try to avoid having slots at the same time
380 in the wanted and the unwanted lists.
384 =head2 get_wanted_slots
386 Title : get_wanted_slots
387 Usage : $obj->get_wanted_slots($newval)
388 Function: Get the list of wanted slots
390 Returns : a list of strings
396 sub get_wanted_slots
{
399 return @
{$self->{'wanted_slots'}};
402 =head2 add_wanted_slot
404 Title : add_wanted_slot
406 Function: Adds the specified slots to the list of wanted slots.
409 Args : an array of slot names (strings)
414 my ($self,@slots) = @_;
416 my $myslots = $self->{'wanted_slots'};
417 foreach my $slot (@slots) {
418 if(! grep { $slot eq $_; } @
$myslots) {
419 push(@
$myslots, $slot);
425 =head2 remove_wanted_slots
427 Title : remove_wanted_slots
429 Function: Removes all wanted slots added previously through
432 Returns : the previous list of wanted slot names
437 sub remove_wanted_slots
{
439 my @slots = $self->get_wanted_slots();
440 $self->{'wanted_slots'} = [];
444 =head2 get_unwanted_slots
446 Title : get_unwanted_slots
447 Usage : $obj->get_unwanted_slots($newval)
448 Function: Get the list of unwanted slots.
450 Returns : a list of strings
455 sub get_unwanted_slots
{
458 return @
{$self->{'unwanted_slots'}};
461 =head2 add_unwanted_slot
463 Title : add_unwanted_slot
465 Function: Adds the specified slots to the list of unwanted slots.
468 Args : an array of slot names (strings)
472 sub add_unwanted_slot
{
473 my ($self,@slots) = @_;
475 my $myslots = $self->{'unwanted_slots'};
476 foreach my $slot (@slots) {
477 if(! grep { $slot eq $_; } @
$myslots) {
478 push(@
$myslots, $slot);
484 =head2 remove_unwanted_slots
486 Title : remove_unwanted_slots
488 Function: Removes the list of unwanted slots added previously through
489 add_unwanted_slots().
491 Returns : the previous list of unwanted slot names
496 sub remove_unwanted_slots
{
498 my @slots = $self->get_unwanted_slots();
499 $self->{'unwanted_slots'} = [];
507 Function: Disables all slots. After calling this method, want_slot()
508 will return FALSE regardless of slot name.
510 This is different from removed_wanted_slots() in that it
511 also sets want_all() to FALSE. Note that it also resets the
512 list of unwanted slots in order to avoid slots being in
525 $self->remove_wanted_slots();
526 $self->remove_unwanted_slots();
533 Usage : $obj->want_all($newval)
534 Function: Whether or not this sequence object builder wants to
535 populate all slots that the object has. Whether an object
536 supports a slot is generally determined by what can()
537 returns. You can add additional 'virtual' slots by calling
540 This will be ON by default. Call $obj->want_none() to
544 Returns : TRUE if this builder wants to populate all slots, and
546 Args : on set, new value (a scalar or undef, optional)
553 return $self->{'want_all'} = shift if @_;
554 return $self->{'want_all'};
557 =head2 get_object_conditions
559 Title : get_object_conditions
561 Function: Get the list of conditions an object must meet in order to
562 be 'wanted.' See want_object() for where this is used.
564 Conditions in this implementation are closures (anonymous
565 functions) which are passed one parameter, a hash reference
566 the keys of which are equal to initialization
567 parameters. The closure must return TRUE to make the object
570 Conditions will be implicitly ANDed.
573 Returns : a list of closures
578 sub get_object_conditions
{
581 return @
{$self->{'object_conds'}};
584 =head2 add_object_condition
586 Title : add_object_condition
588 Function: Adds a condition an object must meet in order to be 'wanted.'
589 See want_object() for where this is used.
591 Conditions in this implementation must be closures
592 (anonymous functions). These will be passed one parameter,
593 which is a hash reference with the sequence object
594 initialization parameters being the keys.
596 Conditions are implicitly ANDed. If you want other
597 operators, perform those tests inside of one closure
598 instead of multiple. This will also be more efficient.
602 Args : the list of conditions
606 sub add_object_condition
{
607 my ($self,@conds) = @_;
609 if(grep { ref($_) ne 'CODE'; } @conds) {
610 $self->throw("conditions against which to validate an object ".
611 "must be anonymous code blocks");
613 push(@
{$self->{'object_conds'}}, @conds);
617 =head2 remove_object_conditions
619 Title : remove_object_conditions
621 Function: Removes the conditions an object must meet in order to be
624 Returns : The list of previously set conditions (an array of closures)
629 sub remove_object_conditions
{
631 my @conds = $self->get_object_conditions();
632 $self->{'object_conds'} = [];
636 =head1 Methods to control what type of object is built
640 =head2 sequence_factory
642 Title : sequence_factory
643 Usage : $obj->sequence_factory($newval)
644 Function: Get/set the sequence factory to be used by this object
647 Returns : the Bio::Factory::SequenceFactoryI implementing object to use
648 Args : on set, new value (a Bio::Factory::SequenceFactoryI
649 implementing object or undef, optional)
653 sub sequence_factory
{
657 delete $self->{'_objskel'};
658 return $self->{'sequence_factory'} = shift;
660 return $self->{'sequence_factory'};