1 # BioPerl module for Bio::Tools::AmpliconSearch
3 # Copyright Florent Angly
5 # You may distribute this module under the same terms as perl itself
8 package Bio
::Tools
::AmpliconSearch
;
12 use Bio
::Tools
::IUPAC
;
13 use Bio
::SeqFeature
::Amplicon
;
14 use Bio
::Tools
::SeqPattern
;
15 # we require Bio::SeqIO
16 # and Bio::SeqFeature::Primer
18 use base
qw(Bio::Root::Root);
25 Bio::Tools::AmpliconSearch - Find amplicons in a template using degenerate PCR primers
30 use Bio::Tools::AmpliconSearch;
32 my $template = Bio::PrimarySeq->new(
33 -seq => 'aaaaaCCCCaaaaaaaaaaTTTTTTaaaaaCCACaaaaaTTTTTTaaaaaaaaaa',
35 my $fwd_primer = Bio::PrimarySeq->new(
38 my $rev_primer = Bio::PrimarySeq->new(
42 my $search = Bio::Tools::AmpliconSearch->new(
43 -template => $template,
44 -fwd_primer => $fwd_primer,
45 -rev_primer => $rev_primer,
48 while (my $amplicon = $search->next_amplicon) {
49 print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
50 print $amplicon->seq->seq."\n\n";
53 # Now change the template (but you could change the primers instead) and look
56 $template = Bio::PrimarySeq->new(
57 -seq => 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
59 $search->template($template);
61 while (my $amplicon = $search->next_amplicon) {
62 print "Found amplicon at position ".$amplicon->start.'..'.$amplicon->end.":\n";
63 print $amplicon->seq->seq."\n\n";
68 Perform an in silico PCR reaction, i.e. search for amplicons in a given template
69 sequence using the specified degenerate primer.
71 The template sequence is a sequence object, e.g. L<Bio::Seq>, and the primers
72 can be a sequence or a L<Bio::SeqFeature::Primer> object and contain ambiguous
73 residues as defined in the IUPAC conventions. The primer sequences are converted
74 into regular expressions using L<Bio::Tools::IUPAC> and the matching regions of
75 the template sequence, i.e. the amplicons, are returned as L<Bio::Seq::PrimedSeq>
78 AmpliconSearch will look for amplicons on both strands (forward and reverse-
79 complement) of the specified template sequence. If the reverse primer is not
80 provided, an amplicon will be returned and span a match of the forward primer to
81 the end of the template. Similarly, when no forward primer is given, match from
82 the beginning of the template sequence. When several amplicons overlap, only the
83 shortest one to more accurately represent the biases of PCR. Future improvements
84 may include modelling the effects of the number of PCR cycles or temperature on
89 Future improvements may include:
95 Allowing a small number of primer mismatches
99 Reporting all amplicons, including overlapping ones
103 Putting a limit on the length of amplicons, in accordance with the processivity
104 of the polymerase used
112 User feedback is an integral part of the evolution of this and other
113 Bioperl modules. Send your comments and suggestions preferably to one
114 of the Bioperl mailing lists. Your participation is much appreciated.
116 bioperl-l@bioperl.org - General discussion
117 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
121 Please direct usage questions or support issues to the mailing list:
123 I<bioperl-l@bioperl.org>
125 rather than to the module maintainer directly. Many experienced and
126 reponsive experts will be able look at the problem and quickly
127 address it. Please include a thorough description of the problem
128 with code and data examples if at all possible.
130 =head2 Reporting Bugs
132 Report bugs to the Bioperl bug tracking system to help us keep track
133 the bugs and their resolution. Bug reports can be submitted via the
136 https://github.com/bioperl/bioperl-live/issues
140 Florent Angly <florent.angly@gmail.com>
144 The rest of the documentation details each of the object
145 methods. Internal methods are usually preceded with a _
150 Usage : my $search = Bio::Tools::AmpliconSearch->new( );
151 Function : Initialize an amplicon search
152 Args : -template Sequence object for the template sequence. This object
153 will be converted to Bio::Seq if needed in since features
154 (amplicons and primers) will be added to this object.
155 -fwd_primer A sequence object representing the forward primer
156 -rev_primer A sequence object representing the reverse primer
157 -primer_file Read primers from a sequence file. It replaces
158 -fwd_primer and -rev_primer (optional)
159 -attach_primers Whether or not to attach primers to Amplicon objects. Default: 0 (off)
160 Returns : A Bio::Tools::AmpliconSearch object
165 my ($class, @args) = @_;
166 my $self = $class->SUPER::new
(@args);
167 my ($template, $primer_file, $fwd_primer, $rev_primer, $attach_primers) =
168 $self->_rearrange([qw(TEMPLATE PRIMER_FILE FWD_PRIMER REV_PRIMER ATTACH_PRIMERS)],
172 if (defined $primer_file) {
173 $self->primer_file($primer_file);
175 $self->fwd_primer($fwd_primer || '');
176 $self->rev_primer($rev_primer || '');
179 # Get template sequence
180 $self->template($template) if defined $template;
182 $self->attach_primers($attach_primers) if defined $attach_primers;
191 Usage : my $template = $search->template;
192 Function : Get/set the template sequence. Setting a new template resets any
194 Args : Optional Bio::Seq object
195 Returns : A Bio::Seq object
200 my ($self, $template) = @_;
201 if (defined $template) {
202 if ( not(ref $template) || not $template->isa('Bio::PrimarySeqI') ) {
203 # Not a Bio::Seq or Bio::PrimarySeq
204 $self->throw("Expected a sequence object as input but got a '".ref($template)."'\n");
206 if (not $template->isa('Bio::SeqI')) {
207 # Convert sequence object to Bio::Seq Seq so that features can be added
208 my $primary_seq = $template;
209 $template = Bio
::Seq
->new();
210 $template->primary_seq($primary_seq);
212 $self->{template
} = $template;
213 # Reset search in progress
214 $template_str = undef;
216 return $self->{template
};
223 Usage : my $primer = $search->fwd_primer;
224 Function : Get/set the forward primer. Setting a new forward primer resets any
226 Args : Optional sequence object or primer object or '' to match beginning
228 Returns : A sequence object or primer object or undef
233 my ($self, $primer) = @_;
234 if (defined $primer) {
235 $self->_set_primer('fwd', $primer);
237 return $self->{fwd_primer
};
244 Usage : my $primer = $search->rev_primer;
245 Function : Get/set the reverse primer. Setting a new reverse primer resets any
247 Args : Optional sequence object or primer object or '' to match end of
249 Returns : A sequence object or primer object or undef
254 my ($self, $primer) = @_;
255 if (defined $primer) {
256 $self->_set_primer('rev', $primer);
258 return $self->{rev_primer
};
263 # Save a primer (sequence object) and convert it to regexp. Type is 'fwd' for
264 # the forward primer or 'rev' for the reverse primer.
265 my ($self, $type, $primer) = @_;
269 $re = $type eq 'fwd' ?
'^' : '$';
271 if ( not(ref $primer) || (
272 not($primer->isa('Bio::PrimarySeqI')) &&
273 not($primer->isa('Bio::SeqFeature::Primer')) ) ) {
274 $self->throw('Expected a sequence or primer object as input but got a '.ref($primer)."\n");
276 $self->{$type.'_primer'} = $primer;
277 my $seq = $primer->isa('Bio::SeqFeature::Primer') ?
$primer->seq : $primer;
278 $re = Bio
::Tools
::IUPAC
->new(
279 -seq
=> $type eq 'fwd' ?
$seq : $seq->revcom,
280 )->regexp($match_rna);
282 $self->{$type.'_regexp'} = $re;
283 # Reset search in progress
284 $template_str = undef;
285 $self->{regexp
} = undef;
286 return $self->{$type.'_primer'};
293 Usage : my ($fwd, $rev) = $search->primer_file;
294 Function : Get/set a sequence file to read the primer from. The first sequence
295 must be the forward primer, and the second is the optional reverse
296 primer. After reading the file, the primers are set using fwd_primer()
297 and rev_primer() and returned.
299 Returns : Array containing forward and reverse primers as sequence objects.
304 my ($self, $primer_file) = @_;
305 # Read primer file and convert primers into regular expressions to catch
306 # amplicons present in the database
308 if (not defined $primer_file) {
309 $self->throw("Need to provide an input file\n");
312 # Mandatory first primer
314 my $in = Bio
::SeqIO
->new( -file
=> $primer_file );
315 my $fwd_primer = $in->next_seq;
316 if (not defined $fwd_primer) {
317 $self->throw("The file '$primer_file' contains no primers\n");
319 $fwd_primer->alphabet('dna'); # Force the alphabet since degenerate primers can look like protein sequences
321 # Optional reverse primers
322 my $rev_primer = $in->next_seq;
323 if (defined $rev_primer) {
324 $rev_primer->alphabet('dna');
331 $self->fwd_primer($fwd_primer);
332 $self->rev_primer($rev_primer);
334 return ($fwd_primer, $rev_primer);
338 =head2 attach_primers
340 Title : attach_primers
341 Usage : my $attached = $search->attach_primers;
342 Function : Get/set whether or not to attach primer objects to the amplicon
344 Args : Optional integer (1 for yes, 0 for no)
345 Returns : Integer (1 for yes, 0 for no)
350 my ($self, $attach) = @_;
351 if (defined $attach) {
352 $self->{attach_primers
} = $attach;
353 require Bio
::SeqFeature
::Primer
;
355 return $self->{attach_primers
} || 0;
361 Title : next_amplicon
362 Usage : my $amplicon = $search->next_amplicon;
363 Function : Get the next amplicon
365 Returns : A Bio::SeqFeature::Amplicon object
373 if (not defined $template_str) {
377 my $re = $self->_regexp;
380 if ($template_str =~ m/$re/g) {
381 my ($match, $rev_match) = ($1, $2);
382 my $strand = $rev_match ?
-1 : 1;
383 $match = $match || $rev_match;
384 my $end = pos($template_str);
385 my $start = $end - length($match) + 1;
386 $amplicon = $self->_attach_amplicon($start, $end, $strand);
389 # If no more matches. Make sure calls to next_amplicon() will return undef.
401 if ( not $self->template ) {
402 $self->throw('Need to provide a template sequence');
404 if ( not($self->fwd_primer) && not($self->rev_primer) ) {
405 $self->throw('Need to provide at least a primer');
407 # Set the template sequence string
408 $template_str = $self->template->seq;
409 # Set the regular expression to match amplicons
417 # Get the regexp to match amplicon. If the regexp is not set, initialize it.
418 my ($self, $regexp) = @_;
420 if ( not defined $self->{regexp
} ) {
421 # Build regexp that matches amplicons on both strands and reports shortest
422 # amplicon when there are several overlapping amplicons
424 my $fwd_regexp = $self->_fwd_regexp;
425 my $rev_regexp = $self->_rev_regexp;
427 my ($fwd_regexp_rc, $basic_fwd_match, $rev_regexp_rc, $basic_rev_match);
428 if ($fwd_regexp eq '^') {
430 $basic_fwd_match = "(?:.*?$rev_regexp)";
432 $fwd_regexp_rc = Bio
::Tools
::SeqPattern
->new(
436 $basic_fwd_match = "(?:$fwd_regexp.*?$rev_regexp)";
439 if ($rev_regexp eq '$') {
441 $basic_rev_match = "(?:.*?$fwd_regexp_rc)";
443 $rev_regexp_rc = Bio
::Tools
::SeqPattern
->new(
447 $basic_rev_match = "(?:$rev_regexp_rc.*?$fwd_regexp_rc)";
450 my $fwd_exclude = "(?!$basic_rev_match".
451 ($fwd_regexp eq '^' ?
'' : "|$fwd_regexp").
454 my $rev_exclude = "(?!$basic_fwd_match".
455 ($rev_regexp eq '$' ?
'' : "|$rev_regexp_rc").
458 $self->{regexp
} = qr
/
459 ( $fwd_regexp (?
:$fwd_exclude.)*?
$rev_regexp ) |
460 ( $rev_regexp_rc (?
:$rev_exclude.)*?
$fwd_regexp_rc )
464 return $self->{regexp
};
468 =head2 annotate_template
470 Title : annotate_template
471 Usage : my $template = $search->annotate_template;
472 Function : Search for all amplicons and attach them to the template.
473 This is equivalent to running:
474 while (my $amplicon = $self->next_amplicon) {
477 my $annotated = $self->template;
479 Returns : A Bio::Seq object with attached Bio::SeqFeature::Amplicons (and
480 Bio::SeqFeature::Primers if you set -attach_primers to 1).
484 sub annotate_template
{
486 # Search all amplicons and attach them to template
487 1 while $self->next_amplicon;
488 # Return annotated template
489 return $self->template;
495 return $self->{fwd_regexp
};
501 return $self->{rev_regexp
};
505 sub _attach_amplicon
{
506 # Create an amplicon object and attach it to template
507 my ($self, $start, $end, $strand) = @_;
509 # Create Bio::SeqFeature::Amplicon feature and attach it to the template
510 my $amplicon = Bio
::SeqFeature
::Amplicon
->new(
514 -template
=> $self->template,
517 # Create Bio::SeqFeature::Primer feature and attach them to the amplicon
518 if ($self->attach_primers) {
519 for my $type ('fwd', 'rev') {
520 my ($pstart, $pend, $pstrand, $primer_seq);
522 # Coordinates relative to amplicon
523 if ($type eq 'fwd') {
525 $primer_seq = $self->fwd_primer;
526 next if not defined $primer_seq;
528 $pend = $primer_seq->length;
529 $pstrand = $amplicon->strand;
531 # Optional reverse primer
532 $primer_seq = $self->rev_primer;
533 next if not defined $primer_seq;
534 $pstart = $end - $primer_seq->length + 1;
536 $pstrand = -1 * $amplicon->strand;
539 # Absolute coordinates needed
540 $pstart += $start - 1;
543 my $primer = Bio
::SeqFeature
::Primer
->new(
547 -template
=> $amplicon,
550 # Attach primer to amplicon
551 if ($type eq 'fwd') {
552 $amplicon->fwd_primer($primer);
554 $amplicon->rev_primer($primer);