Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / Tools / AmpliconSearch.pm
bloba290fbbd329dcf9718680dbf594bc31f5b1f94fa
1 # BioPerl module for Bio::Tools::AmpliconSearch
3 # Copyright Florent Angly
5 # You may distribute this module under the same terms as perl itself
8 package Bio::Tools::AmpliconSearch;
10 use strict;
11 use warnings;
12 use Bio::Tools::IUPAC;
13 use Bio::SeqFeature::Amplicon;
14 use Bio::Tools::SeqPattern;
15 # we require Bio::SeqIO
16 # and Bio::SeqFeature::Primer
18 use base qw(Bio::Root::Root);
20 my $template_str;
23 =head1 NAME
25 Bio::Tools::AmpliconSearch - Find amplicons in a template using degenerate PCR primers
27 =head1 SYNOPSIS
29 use Bio::PrimarySeq;
30 use Bio::Tools::AmpliconSearch;
32 my $template = Bio::PrimarySeq->new(
33 -seq => 'aaaaaCCCCaaaaaaaaaaTTTTTTaaaaaCCACaaaaaTTTTTTaaaaaaaaaa',
35 my $fwd_primer = Bio::PrimarySeq->new(
36 -seq => 'CCNC',
38 my $rev_primer = Bio::PrimarySeq->new(
39 -seq => 'AAAAA',
42 my $search = Bio::Tools::AmpliconSearch->new(
43 -template => $template,
44 -fwd_primer => $fwd_primer,
45 -rev_primer => $rev_primer,
48 while (my $amplicon = $search->next_amplicon) {
49 print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
50 print $amplicon->seq->seq."\n\n";
53 # Now change the template (but you could change the primers instead) and look
54 # for amplicons again
56 $template = Bio::PrimarySeq->new(
57 -seq => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
59 $search->template($template);
61 while (my $amplicon = $search->next_amplicon) {
62 print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
63 print $amplicon->seq->seq."\n\n";
66 =head1 DESCRIPTION
68 Perform an in silico PCR reaction, i.e. search for amplicons in a given template
69 sequence using the specified degenerate primer.
71 The template sequence is a sequence object, e.g. L<Bio::Seq>, and the primers
72 can be a sequence or a L<Bio::SeqFeature::Primer> object and contain ambiguous
73 residues as defined in the IUPAC conventions. The primer sequences are converted
74 into regular expressions using L<Bio::Tools::IUPAC> and the matching regions of
75 the template sequence, i.e. the amplicons, are returned as L<Bio::Seq::PrimedSeq>
76 objects.
78 AmpliconSearch will look for amplicons on both strands (forward and reverse-
79 complement) of the specified template sequence. If the reverse primer is not
80 provided, an amplicon will be returned and span a match of the forward primer to
81 the end of the template. Similarly, when no forward primer is given, match from
82 the beginning of the template sequence. When several amplicons overlap, only the
83 shortest one to more accurately represent the biases of PCR. Future improvements
84 may include modelling the effects of the number of PCR cycles or temperature on
85 the PCR products.
87 =head1 TODO
89 Future improvements may include:
91 =over
93 =item *
95 Allowing a small number of primer mismatches
97 =item *
99 Reporting all amplicons, including overlapping ones
101 =item *
103 Putting a limit on the length of amplicons, in accordance with the processivity
104 of the polymerase used
106 =back
108 =head1 FEEDBACK
110 =head2 Mailing Lists
112 User feedback is an integral part of the evolution of this and other
113 Bioperl modules. Send your comments and suggestions preferably to one
114 of the Bioperl mailing lists. Your participation is much appreciated.
116 bioperl-l@bioperl.org - General discussion
117 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
119 =head2 Support
121 Please direct usage questions or support issues to the mailing list:
123 I<bioperl-l@bioperl.org>
125 rather than to the module maintainer directly. Many experienced and
126 reponsive experts will be able look at the problem and quickly
127 address it. Please include a thorough description of the problem
128 with code and data examples if at all possible.
130 =head2 Reporting Bugs
132 Report bugs to the Bioperl bug tracking system to help us keep track
133 the bugs and their resolution. Bug reports can be submitted via the
134 web:
136 https://github.com/bioperl/bioperl-live/issues
138 =head1 AUTHOR
140 Florent Angly <florent.angly@gmail.com>
142 =head1 APPENDIX
144 The rest of the documentation details each of the object
145 methods. Internal methods are usually preceded with a _
147 =head2 new
149 Title : new
150 Usage : my $search = Bio::Tools::AmpliconSearch->new( );
151 Function : Initialize an amplicon search
152 Args : -template Sequence object for the template sequence. This object
153 will be converted to Bio::Seq if needed in since features
154 (amplicons and primers) will be added to this object.
155 -fwd_primer A sequence object representing the forward primer
156 -rev_primer A sequence object representing the reverse primer
157 -primer_file Read primers from a sequence file. It replaces
158 -fwd_primer and -rev_primer (optional)
159 -attach_primers Whether or not to attach primers to Amplicon objects. Default: 0 (off)
160 Returns : A Bio::Tools::AmpliconSearch object
162 =cut
164 sub new {
165 my ($class, @args) = @_;
166 my $self = $class->SUPER::new(@args);
167 my ($template, $primer_file, $fwd_primer, $rev_primer, $attach_primers) =
168 $self->_rearrange([qw(TEMPLATE PRIMER_FILE FWD_PRIMER REV_PRIMER ATTACH_PRIMERS)],
169 @args);
171 # Get primers
172 if (defined $primer_file) {
173 $self->primer_file($primer_file);
174 } else {
175 $self->fwd_primer($fwd_primer || '');
176 $self->rev_primer($rev_primer || '');
179 # Get template sequence
180 $self->template($template) if defined $template;
182 $self->attach_primers($attach_primers) if defined $attach_primers;
184 return $self;
188 =head2 template
190 Title : template
191 Usage : my $template = $search->template;
192 Function : Get/set the template sequence. Setting a new template resets any
193 search in progress.
194 Args : Optional Bio::Seq object
195 Returns : A Bio::Seq object
197 =cut
199 sub template {
200 my ($self, $template) = @_;
201 if (defined $template) {
202 if ( not(ref $template) || not $template->isa('Bio::PrimarySeqI') ) {
203 # Not a Bio::Seq or Bio::PrimarySeq
204 $self->throw("Expected a sequence object as input but got a '".ref($template)."'\n");
206 if (not $template->isa('Bio::SeqI')) {
207 # Convert sequence object to Bio::Seq Seq so that features can be added
208 my $primary_seq = $template;
209 $template = Bio::Seq->new();
210 $template->primary_seq($primary_seq);
212 $self->{template} = $template;
213 # Reset search in progress
214 $template_str = undef;
216 return $self->{template};
220 =head2 fwd_primer
222 Title : fwd_primer
223 Usage : my $primer = $search->fwd_primer;
224 Function : Get/set the forward primer. Setting a new forward primer resets any
225 search in progress.
226 Args : Optional sequence object or primer object or '' to match beginning
227 of sequence.
228 Returns : A sequence object or primer object or undef
230 =cut
232 sub fwd_primer {
233 my ($self, $primer) = @_;
234 if (defined $primer) {
235 $self->_set_primer('fwd', $primer);
237 return $self->{fwd_primer};
241 =head2 rev_primer
243 Title : rev_primer
244 Usage : my $primer = $search->rev_primer;
245 Function : Get/set the reverse primer. Setting a new reverse primer resets any
246 search in progress.
247 Args : Optional sequence object or primer object or '' to match end of
248 sequence.
249 Returns : A sequence object or primer object or undef
251 =cut
253 sub rev_primer {
254 my ($self, $primer) = @_;
255 if (defined $primer) {
256 $self->_set_primer('rev', $primer);
258 return $self->{rev_primer};
262 sub _set_primer {
263 # Save a primer (sequence object) and convert it to regexp. Type is 'fwd' for
264 # the forward primer or 'rev' for the reverse primer.
265 my ($self, $type, $primer) = @_;
266 my $re;
267 my $match_rna = 1;
268 if ($primer eq '') {
269 $re = $type eq 'fwd' ? '^' : '$';
270 } else {
271 if ( not(ref $primer) || (
272 not($primer->isa('Bio::PrimarySeqI')) &&
273 not($primer->isa('Bio::SeqFeature::Primer')) ) ) {
274 $self->throw('Expected a sequence or primer object as input but got a '.ref($primer)."\n");
276 $self->{$type.'_primer'} = $primer;
277 my $seq = $primer->isa('Bio::SeqFeature::Primer') ? $primer->seq : $primer;
278 $re = Bio::Tools::IUPAC->new(
279 -seq => $type eq 'fwd' ? $seq : $seq->revcom,
280 )->regexp($match_rna);
282 $self->{$type.'_regexp'} = $re;
283 # Reset search in progress
284 $template_str = undef;
285 $self->{regexp} = undef;
286 return $self->{$type.'_primer'};
290 =head2 primer_file
292 Title : primer_file
293 Usage : my ($fwd, $rev) = $search->primer_file;
294 Function : Get/set a sequence file to read the primer from. The first sequence
295 must be the forward primer, and the second is the optional reverse
296 primer. After reading the file, the primers are set using fwd_primer()
297 and rev_primer() and returned.
298 Args : Sequence file
299 Returns : Array containing forward and reverse primers as sequence objects.
301 =cut
303 sub primer_file {
304 my ($self, $primer_file) = @_;
305 # Read primer file and convert primers into regular expressions to catch
306 # amplicons present in the database
308 if (not defined $primer_file) {
309 $self->throw("Need to provide an input file\n");
312 # Mandatory first primer
313 require Bio::SeqIO;
314 my $in = Bio::SeqIO->new( -file => $primer_file );
315 my $fwd_primer = $in->next_seq;
316 if (not defined $fwd_primer) {
317 $self->throw("The file '$primer_file' contains no primers\n");
319 $fwd_primer->alphabet('dna'); # Force the alphabet since degenerate primers can look like protein sequences
321 # Optional reverse primers
322 my $rev_primer = $in->next_seq;
323 if (defined $rev_primer) {
324 $rev_primer->alphabet('dna');
325 } else {
326 $rev_primer = '';
329 $in->close;
331 $self->fwd_primer($fwd_primer);
332 $self->rev_primer($rev_primer);
334 return ($fwd_primer, $rev_primer);
338 =head2 attach_primers
340 Title : attach_primers
341 Usage : my $attached = $search->attach_primers;
342 Function : Get/set whether or not to attach primer objects to the amplicon
343 objects.
344 Args : Optional integer (1 for yes, 0 for no)
345 Returns : Integer (1 for yes, 0 for no)
347 =cut
349 sub attach_primers {
350 my ($self, $attach) = @_;
351 if (defined $attach) {
352 $self->{attach_primers} = $attach;
353 require Bio::SeqFeature::Primer;
355 return $self->{attach_primers} || 0;
359 =head2 next_amplicon
361 Title : next_amplicon
362 Usage : my $amplicon = $search->next_amplicon;
363 Function : Get the next amplicon
364 Args : None
365 Returns : A Bio::SeqFeature::Amplicon object
367 =cut
369 sub next_amplicon {
370 my ($self) = @_;
372 # Initialize search
373 if (not defined $template_str) {
374 $self->_init;
377 my $re = $self->_regexp;
379 my $amplicon;
380 if ($template_str =~ m/$re/g) {
381 my ($match, $rev_match) = ($1, $2);
382 my $strand = $rev_match ? -1 : 1;
383 $match = $match || $rev_match;
384 my $end = pos($template_str);
385 my $start = $end - length($match) + 1;
386 $amplicon = $self->_attach_amplicon($start, $end, $strand);
389 # If no more matches. Make sure calls to next_amplicon() will return undef.
390 if (not $amplicon) {
391 $template_str = '';
394 return $amplicon;
398 sub _init {
399 my ($self) = @_;
400 # Sanity checks
401 if ( not $self->template ) {
402 $self->throw('Need to provide a template sequence');
404 if ( not($self->fwd_primer) && not($self->rev_primer) ) {
405 $self->throw('Need to provide at least a primer');
407 # Set the template sequence string
408 $template_str = $self->template->seq;
409 # Set the regular expression to match amplicons
410 $self->_regexp;
412 return 1;
416 sub _regexp {
417 # Get the regexp to match amplicon. If the regexp is not set, initialize it.
418 my ($self, $regexp) = @_;
420 if ( not defined $self->{regexp} ) {
421 # Build regexp that matches amplicons on both strands and reports shortest
422 # amplicon when there are several overlapping amplicons
424 my $fwd_regexp = $self->_fwd_regexp;
425 my $rev_regexp = $self->_rev_regexp;
427 my ($fwd_regexp_rc, $basic_fwd_match, $rev_regexp_rc, $basic_rev_match);
428 if ($fwd_regexp eq '^') {
429 $fwd_regexp_rc = '';
430 $basic_fwd_match = "(?:.*?$rev_regexp)";
431 } else {
432 $fwd_regexp_rc = Bio::Tools::SeqPattern->new(
433 -seq => $fwd_regexp,
434 -type => 'dna',
435 )->revcom->str;
436 $basic_fwd_match = "(?:$fwd_regexp.*?$rev_regexp)";
439 if ($rev_regexp eq '$') {
440 $rev_regexp_rc = '';
441 $basic_rev_match = "(?:.*?$fwd_regexp_rc)";
442 } else {
443 $rev_regexp_rc = Bio::Tools::SeqPattern->new(
444 -seq => $rev_regexp,
445 -type => 'dna',
446 )->revcom->str;
447 $basic_rev_match = "(?:$rev_regexp_rc.*?$fwd_regexp_rc)";
450 my $fwd_exclude = "(?!$basic_rev_match".
451 ($fwd_regexp eq '^' ? '' : "|$fwd_regexp").
452 ")";
454 my $rev_exclude = "(?!$basic_fwd_match".
455 ($rev_regexp eq '$' ? '' : "|$rev_regexp_rc").
456 ')';
458 $self->{regexp} = qr/
459 ( $fwd_regexp (?:$fwd_exclude.)*? $rev_regexp ) |
460 ( $rev_regexp_rc (?:$rev_exclude.)*? $fwd_regexp_rc )
461 /xi;
464 return $self->{regexp};
468 =head2 annotate_template
470 Title : annotate_template
471 Usage : my $template = $search->annotate_template;
472 Function : Search for all amplicons and attach them to the template.
473 This is equivalent to running:
474 while (my $amplicon = $self->next_amplicon) {
475 # do something
477 my $annotated = $self->template;
478 Args : None
479 Returns : A Bio::Seq object with attached Bio::SeqFeature::Amplicons (and
480 Bio::SeqFeature::Primers if you set -attach_primers to 1).
482 =cut
484 sub annotate_template {
485 my ($self) = @_;
486 # Search all amplicons and attach them to template
487 1 while $self->next_amplicon;
488 # Return annotated template
489 return $self->template;
493 sub _fwd_regexp {
494 my ($self) = @_;
495 return $self->{fwd_regexp};
499 sub _rev_regexp {
500 my ($self) = @_;
501 return $self->{rev_regexp};
505 sub _attach_amplicon {
506 # Create an amplicon object and attach it to template
507 my ($self, $start, $end, $strand) = @_;
509 # Create Bio::SeqFeature::Amplicon feature and attach it to the template
510 my $amplicon = Bio::SeqFeature::Amplicon->new(
511 -start => $start,
512 -end => $end,
513 -strand => $strand,
514 -template => $self->template,
517 # Create Bio::SeqFeature::Primer feature and attach them to the amplicon
518 if ($self->attach_primers) {
519 for my $type ('fwd', 'rev') {
520 my ($pstart, $pend, $pstrand, $primer_seq);
522 # Coordinates relative to amplicon
523 if ($type eq 'fwd') {
524 # Forward primer
525 $primer_seq = $self->fwd_primer;
526 next if not defined $primer_seq;
527 $pstart = 1;
528 $pend = $primer_seq->length;
529 $pstrand = $amplicon->strand;
530 } else {
531 # Optional reverse primer
532 $primer_seq = $self->rev_primer;
533 next if not defined $primer_seq;
534 $pstart = $end - $primer_seq->length + 1;
535 $pend = $end;
536 $pstrand = -1 * $amplicon->strand;
539 # Absolute coordinates needed
540 $pstart += $start - 1;
541 $pend += $start - 1;
543 my $primer = Bio::SeqFeature::Primer->new(
544 -start => $pstart,
545 -end => $pend,
546 -strand => $pstrand,
547 -template => $amplicon,
550 # Attach primer to amplicon
551 if ($type eq 'fwd') {
552 $amplicon->fwd_primer($primer);
553 } else {
554 $amplicon->rev_primer($primer);
560 return $amplicon;