Use /usr/bin/perl instead of env even on examples
[bioperl-live.git] / lib / Bio / Seq / SeqBuilder.pm
blob9f201ad36531dec9ab42edbf54fc07a17dc2b7ac
2 # BioPerl module for Bio::Seq::SeqBuilder
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Hilmar Lapp <hlapp at gmx.net>
8 # Copyright Hilmar Lapp
10 # You may distribute this module under the same terms as perl itself
13 # (c) Hilmar Lapp, hlapp at gmx.net, 2002.
14 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
16 # You may distribute this module under the same terms as perl itself.
17 # Refer to the Perl Artistic License (see the license accompanying this
18 # software package, or see http://www.perl.com/language/misc/Artistic.html)
19 # for the terms under which you may use, modify, and redistribute this module.
21 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
26 # POD documentation - main docs before the code
28 =head1 NAME
30 Bio::Seq::SeqBuilder - Configurable object builder for sequence stream parsers
32 =head1 SYNOPSIS
34 use Bio::SeqIO;
36 # usually you won't instantiate this yourself - a SeqIO object -
37 # you will have one already
38 my $seqin = Bio::SeqIO->new(-fh => \*STDIN, -format => "genbank");
39 my $builder = $seqin->sequence_builder();
41 # if you need only sequence, id, and description (e.g. for
42 # conversion to FASTA format):
43 $builder->want_none();
44 $builder->add_wanted_slot('display_id','desc','seq');
46 # if you want everything except the sequence and features
47 $builder->want_all(1); # this is the default if it's untouched
48 $builder->add_unwanted_slot('seq','features');
50 # if you want only human sequences shorter than 5kb and skip all
51 # others
52 $builder->add_object_condition(sub {
53 my $h = shift;
54 return 0 if $h->{'-length'} > 5000;
55 return 0 if exists($h->{'-species'}) &&
56 ($h->{'-species'}->binomial() ne "Homo sapiens");
57 return 1;
58 });
60 # when you are finished with configuring the builder, just use
61 # the SeqIO API as you would normally
62 while(my $seq = $seqin->next_seq()) {
63 # do something
66 =head1 DESCRIPTION
68 This is an implementation of L<Bio::Factory::ObjectBuilderI> used by
69 parsers of rich sequence streams. It provides for a relatively
70 easy-to-use configurator of the parsing flow.
72 Configuring the parsing process may be for you if you need much less
73 information, or much less sequence, than the stream actually
74 contains. Configuration can in both cases speed up the parsing time
75 considerably, because unwanted sections or the rest of unwanted
76 sequences are skipped over by the parser. This configuration could
77 also conserve memory if you're running out of available RAM.
79 See the methods of the class-specific implementation section for
80 further documentation of what can be configured.
82 =head1 FEEDBACK
84 =head2 Mailing Lists
86 User feedback is an integral part of the evolution of this and other
87 Bioperl modules. Send your comments and suggestions preferably to
88 the Bioperl mailing list. Your participation is much appreciated.
90 bioperl-l@bioperl.org - General discussion
91 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
93 =head2 Support
95 Please direct usage questions or support issues to the mailing list:
97 I<bioperl-l@bioperl.org>
99 rather than to the module maintainer directly. Many experienced and
100 reponsive experts will be able look at the problem and quickly
101 address it. Please include a thorough description of the problem
102 with code and data examples if at all possible.
104 =head2 Reporting Bugs
106 Report bugs to the Bioperl bug tracking system to help us keep track
107 of the bugs and their resolution. Bug reports can be submitted via
108 the web:
110 https://github.com/bioperl/bioperl-live/issues
112 =head1 AUTHOR - Hilmar Lapp
114 Email hlapp at gmx.net
116 =head1 APPENDIX
118 The rest of the documentation details each of the object methods.
119 Internal methods are usually preceded with a _
121 =cut
124 # Let the code begin...
127 package Bio::Seq::SeqBuilder;
128 use strict;
130 # Object preamble - inherits from Bio::Root::Root
133 use base qw(Bio::Root::Root Bio::Factory::ObjectBuilderI);
135 my %slot_param_map = ("add_SeqFeature" => "features",
137 my %param_slot_map = ("features" => "add_SeqFeature",
140 =head2 new
142 Title : new
143 Usage : my $obj = Bio::Seq::SeqBuilder->new();
144 Function: Builds a new Bio::Seq::SeqBuilder object
145 Returns : an instance of Bio::Seq::SeqBuilder
146 Args :
148 =cut
150 sub new {
151 my($class,@args) = @_;
153 my $self = $class->SUPER::new(@args);
155 $self->{'wanted_slots'} = [];
156 $self->{'unwanted_slots'} = [];
157 $self->{'object_conds'} = [];
158 $self->{'_objhash'} = {};
159 $self->want_all(1);
161 return $self;
164 =head1 Methods for implementing L<Bio::Factory::ObjectBuilderI>
166 =cut
168 =head2 want_slot
170 Title : want_slot
171 Usage :
172 Function: Whether or not the object builder wants to populate the
173 specified slot of the object to be built.
175 The slot can be specified either as the name of the
176 respective method, or the initialization parameter that
177 would be otherwise passed to new() of the object to be
178 built.
180 Note that usually only the parser will call this
181 method. Use add_wanted_slots and add_unwanted_slots for
182 configuration.
184 Example :
185 Returns : TRUE if the object builder wants to populate the slot, and
186 FALSE otherwise.
187 Args : the name of the slot (a string)
190 =cut
192 sub want_slot{
193 my ($self,$slot) = @_;
194 my $ok = 0;
196 $slot = substr($slot,1) if substr($slot,0,1) eq '-';
197 if($self->want_all()) {
198 foreach ($self->get_unwanted_slots()) {
199 # this always overrides in want-all mode
200 return 0 if($slot eq $_);
202 if(! exists($self->{'_objskel'})) {
203 $self->{'_objskel'} = $self->sequence_factory->create_object();
205 if(exists($param_slot_map{$slot})) {
206 $ok = $self->{'_objskel'}->can($param_slot_map{$slot});
207 } else {
208 $ok = $self->{'_objskel'}->can($slot);
210 return $ok if $ok;
211 # even if the object 'cannot' do this slot, it might have been
212 # added to the list of wanted slot, so carry on
214 foreach ($self->get_wanted_slots()) {
215 if($slot eq $_) {
216 $ok = 1;
217 last;
220 return $ok;
223 =head2 add_slot_value
225 Title : add_slot_value
226 Usage :
227 Function: Adds one or more values to the specified slot of the object
228 to be built.
230 Naming the slot is the same as for want_slot().
232 The object builder may further filter the content to be
233 set, or even completely ignore the request.
235 If this method reports failure, the caller should not add
236 more values to the same slot. In addition, the caller may
237 find it appropriate to abandon the object being built
238 altogether.
240 This implementation will allow the caller to overwrite the
241 return value from want_slot(), because the slot is not
242 checked against want_slot().
244 Note that usually only the parser will call this method,
245 but you may call it from anywhere if you know what you are
246 doing. A derived class may be used to further manipulate
247 the value to be added.
249 Example :
250 Returns : TRUE on success, and FALSE otherwise
251 Args : the name of the slot (a string)
252 parameters determining the value to be set
256 alternatively, a list of slotname/value pairs in the style
257 of named parameters as they would be passed to new(), where
258 each element at an even index is the parameter (slot) name
259 starting with a dash, and each element at an odd index is
260 the value of the preceding name.
262 =cut
264 sub add_slot_value{
265 my ($self,$slot,@args) = @_;
267 my $h = $self->{'_objhash'};
268 return unless $h;
269 # multiple named parameter variant of calling?
270 if((@args > 1) && (@args % 2) && (substr($slot,0,1) eq '-')) {
271 unshift(@args, $slot);
272 while(@args) {
273 my $key = shift(@args);
274 $h->{$key} = shift(@args);
276 } else {
277 if($slot eq 'add_SeqFeature') {
278 $slot = '-'.$slot_param_map{$slot};
279 $h->{$slot} = [] unless $h->{$slot};
280 push(@{$h->{$slot}}, @args);
281 } else {
282 $slot = '-'.$slot unless substr($slot,0,1) eq '-';
283 $h->{$slot} = $args[0];
286 return 1;
289 =head2 want_object
291 Title : want_object
292 Usage :
293 Function: Whether or not the object builder is still interested in
294 continuing with the object being built.
296 If this method returns FALSE, the caller should not add any
297 more values to slots, or otherwise risks that the builder
298 throws an exception. In addition, make_object() is likely
299 to return undef after this method returned FALSE.
301 Note that usually only the parser will call this
302 method. Use add_object_condition for configuration.
304 Example :
305 Returns : TRUE if the object builder wants to continue building
306 the present object, and FALSE otherwise.
307 Args : none
309 =cut
311 sub want_object{
312 my $self = shift;
314 my $ok = 1;
315 foreach my $cond ($self->get_object_conditions()) {
316 $ok = &$cond($self->{'_objhash'});
317 last unless $ok;
319 delete $self->{'_objhash'} unless $ok;
320 return $ok;
323 =head2 make_object
325 Title : make_object
326 Usage :
327 Function: Get the built object.
329 This method is allowed to return undef if no value has ever
330 been added since the last call to make_object(), or if
331 want_object() returned FALSE (or would have returned FALSE)
332 before calling this method.
334 For an implementation that allows consecutive building of
335 objects, a caller must call this method once, and only
336 once, between subsequent objects to be built. I.e., a call
337 to make_object implies 'end_object.'
339 Example :
340 Returns : the object that was built
341 Args : none
343 =cut
345 sub make_object{
346 my $self = shift;
348 my $obj;
349 if(exists($self->{'_objhash'}) && %{$self->{'_objhash'}}) {
350 $obj = $self->sequence_factory->create_object(%{$self->{'_objhash'}});
352 $self->{'_objhash'} = {}; # reset
353 return $obj;
356 =head1 Implementation specific methods
358 These methods allow one to conveniently configure this sequence object
359 builder as to which slots are desired, and under which circumstances a
360 sequence object should be abandoned altogether. The default mode is
361 want_all(1), which means the builder will report all slots as wanted
362 that the object created by the sequence factory supports.
364 You can add specific slots you want through add_wanted_slots(). In
365 most cases, you will want to call want_none() before in order to relax
366 zero acceptance through a list of wanted slots.
368 Alternatively, you can add specific unwanted slots through
369 add_unwanted_slots(). In this case, you will usually want to call
370 want_all(1) before (which is the default if you never touched the
371 builder) to restrict unrestricted acceptance.
373 I.e., want_all(1) means want all slots except for the unwanted, and
374 want_none() means only those explicitly wanted.
376 If a slot is in both the unwanted and the wanted list, the following
377 rules hold. In want-all mode, the unwanted list overrules. In
378 want-none mode, the wanted list overrides the unwanted list. If this
379 is confusing to you, just try to avoid having slots at the same time
380 in the wanted and the unwanted lists.
382 =cut
384 =head2 get_wanted_slots
386 Title : get_wanted_slots
387 Usage : $obj->get_wanted_slots($newval)
388 Function: Get the list of wanted slots
389 Example :
390 Returns : a list of strings
391 Args :
394 =cut
396 sub get_wanted_slots{
397 my $self = shift;
399 return @{$self->{'wanted_slots'}};
402 =head2 add_wanted_slot
404 Title : add_wanted_slot
405 Usage :
406 Function: Adds the specified slots to the list of wanted slots.
407 Example :
408 Returns : TRUE
409 Args : an array of slot names (strings)
411 =cut
413 sub add_wanted_slot{
414 my ($self,@slots) = @_;
416 my $myslots = $self->{'wanted_slots'};
417 foreach my $slot (@slots) {
418 if(! grep { $slot eq $_; } @$myslots) {
419 push(@$myslots, $slot);
422 return 1;
425 =head2 remove_wanted_slots
427 Title : remove_wanted_slots
428 Usage :
429 Function: Removes all wanted slots added previously through
430 add_wanted_slots().
431 Example :
432 Returns : the previous list of wanted slot names
433 Args : none
435 =cut
437 sub remove_wanted_slots{
438 my $self = shift;
439 my @slots = $self->get_wanted_slots();
440 $self->{'wanted_slots'} = [];
441 return @slots;
444 =head2 get_unwanted_slots
446 Title : get_unwanted_slots
447 Usage : $obj->get_unwanted_slots($newval)
448 Function: Get the list of unwanted slots.
449 Example :
450 Returns : a list of strings
451 Args : none
453 =cut
455 sub get_unwanted_slots{
456 my $self = shift;
458 return @{$self->{'unwanted_slots'}};
461 =head2 add_unwanted_slot
463 Title : add_unwanted_slot
464 Usage :
465 Function: Adds the specified slots to the list of unwanted slots.
466 Example :
467 Returns : TRUE
468 Args : an array of slot names (strings)
470 =cut
472 sub add_unwanted_slot{
473 my ($self,@slots) = @_;
475 my $myslots = $self->{'unwanted_slots'};
476 foreach my $slot (@slots) {
477 if(! grep { $slot eq $_; } @$myslots) {
478 push(@$myslots, $slot);
481 return 1;
484 =head2 remove_unwanted_slots
486 Title : remove_unwanted_slots
487 Usage :
488 Function: Removes the list of unwanted slots added previously through
489 add_unwanted_slots().
490 Example :
491 Returns : the previous list of unwanted slot names
492 Args : none
494 =cut
496 sub remove_unwanted_slots{
497 my $self = shift;
498 my @slots = $self->get_unwanted_slots();
499 $self->{'unwanted_slots'} = [];
500 return @slots;
503 =head2 want_none
505 Title : want_none
506 Usage :
507 Function: Disables all slots. After calling this method, want_slot()
508 will return FALSE regardless of slot name.
510 This is different from removed_wanted_slots() in that it
511 also sets want_all() to FALSE. Note that it also resets the
512 list of unwanted slots in order to avoid slots being in
513 both lists.
515 Example :
516 Returns : TRUE
517 Args : none
519 =cut
521 sub want_none{
522 my $self = shift;
524 $self->want_all(0);
525 $self->remove_wanted_slots();
526 $self->remove_unwanted_slots();
527 return 1;
530 =head2 want_all
532 Title : want_all
533 Usage : $obj->want_all($newval)
534 Function: Whether or not this sequence object builder wants to
535 populate all slots that the object has. Whether an object
536 supports a slot is generally determined by what can()
537 returns. You can add additional 'virtual' slots by calling
538 add_wanted_slot.
540 This will be ON by default. Call $obj->want_none() to
541 disable all slots.
543 Example :
544 Returns : TRUE if this builder wants to populate all slots, and
545 FALSE otherwise.
546 Args : on set, new value (a scalar or undef, optional)
548 =cut
550 sub want_all{
551 my $self = shift;
553 return $self->{'want_all'} = shift if @_;
554 return $self->{'want_all'};
557 =head2 get_object_conditions
559 Title : get_object_conditions
560 Usage :
561 Function: Get the list of conditions an object must meet in order to
562 be 'wanted.' See want_object() for where this is used.
564 Conditions in this implementation are closures (anonymous
565 functions) which are passed one parameter, a hash reference
566 the keys of which are equal to initialization
567 parameters. The closure must return TRUE to make the object
568 'wanted.'
570 Conditions will be implicitly ANDed.
572 Example :
573 Returns : a list of closures
574 Args : none
576 =cut
578 sub get_object_conditions{
579 my $self = shift;
581 return @{$self->{'object_conds'}};
584 =head2 add_object_condition
586 Title : add_object_condition
587 Usage :
588 Function: Adds a condition an object must meet in order to be 'wanted.'
589 See want_object() for where this is used.
591 Conditions in this implementation must be closures
592 (anonymous functions). These will be passed one parameter,
593 which is a hash reference with the sequence object
594 initialization parameters being the keys.
596 Conditions are implicitly ANDed. If you want other
597 operators, perform those tests inside of one closure
598 instead of multiple. This will also be more efficient.
600 Example :
601 Returns : TRUE
602 Args : the list of conditions
604 =cut
606 sub add_object_condition{
607 my ($self,@conds) = @_;
609 if(grep { ref($_) ne 'CODE'; } @conds) {
610 $self->throw("conditions against which to validate an object ".
611 "must be anonymous code blocks");
613 push(@{$self->{'object_conds'}}, @conds);
614 return 1;
617 =head2 remove_object_conditions
619 Title : remove_object_conditions
620 Usage :
621 Function: Removes the conditions an object must meet in order to be
622 'wanted.'
623 Example :
624 Returns : The list of previously set conditions (an array of closures)
625 Args : none
627 =cut
629 sub remove_object_conditions{
630 my $self = shift;
631 my @conds = $self->get_object_conditions();
632 $self->{'object_conds'} = [];
633 return @conds;
636 =head1 Methods to control what type of object is built
638 =cut
640 =head2 sequence_factory
642 Title : sequence_factory
643 Usage : $obj->sequence_factory($newval)
644 Function: Get/set the sequence factory to be used by this object
645 builder.
646 Example :
647 Returns : the Bio::Factory::SequenceFactoryI implementing object to use
648 Args : on set, new value (a Bio::Factory::SequenceFactoryI
649 implementing object or undef, optional)
651 =cut
653 sub sequence_factory{
654 my $self = shift;
656 if(@_) {
657 delete $self->{'_objskel'};
658 return $self->{'sequence_factory'} = shift;
660 return $self->{'sequence_factory'};