maint: remove no longer used test data files.
[bioperl-live.git] / lib / Bio / AlignIO / meme.pm
blob8f670efa7e5c0c041f0f36431ec742b5308cdd79
2 # BioPerl module for Bio::AlignIO::meme
3 # Based on the Bio::SeqIO modules
4 # by Ewan Birney <birney@ebi.ac.uk>
5 # and Lincoln Stein <lstein@cshl.org>
6 # and the SimpleAlign.pm module of Ewan Birney
8 # Copyright Benjamin Berman
10 # You may distribute this module under the same terms as perl itself
12 =head1 NAME
14 Bio::AlignIO::meme - meme sequence input/output stream
16 =head1 SYNOPSIS
18 Do not use this module directly. Use it via the Bio::AlignIO class.
20 use Bio::AlignIO;
21 # read in an alignment from meme
22 my $in = Bio::AlignIO->new(-format => 'meme',
23 -file => 'meme.out');
24 while( my $aln = $in->next_aln ) {
25 # do something with the alignment
28 =head1 DESCRIPTION
30 This object transforms the "sites sorted by position p-value" sections
31 of a meme (text) output file into a series of Bio::SimpleAlign
32 objects. Each SimpleAlign object contains Bio::LocatableSeq
33 objects which represent the individual aligned sites as defined by
34 the central portion of the "site" field in the meme file. The start
35 and end coordinates are derived from the "Start" field. See
36 L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
38 This module can only parse MEME version 3 and 4. Previous
39 versions have output formats that are more difficult to parse
40 correctly. If the meme output file is not version 3.0 or greater
41 we signal an error.
43 =head1 FEEDBACK
45 =head2 Support
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
56 =head2 Reporting Bugs
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution. Bug reports can be submitted via the
60 web:
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHORS - Benjamin Berman
66 Bbased on the Bio::SeqIO modules by Ewan Birney and others
67 Email: benb@fruitfly.berkeley.edu
69 =head1 APPENDIX
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with an
73 underscore.
75 =cut
77 # Let the code begin...
79 package Bio::AlignIO::meme;
80 use strict;
81 use Bio::LocatableSeq;
83 use base qw(Bio::AlignIO);
85 # Constants
86 my $MEME_VERS_ERR =
87 "MEME output file must be generated by version 3.0 or higher";
88 my $MEME_NO_HEADER_ERR =
89 "MEME output file contains no header line (ex: MEME version 3.0)";
90 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
92 =head2 next_aln
94 Title : next_aln
95 Usage : $aln = $stream->next_aln()
96 Function: returns the next alignment in the stream
97 Returns : Bio::SimpleAlign object with the score() set to the evalue of the
98 motif.
99 Args : NONE
101 =cut
103 sub next_aln {
104 my ($self) = @_;
105 my $aln = Bio::SimpleAlign->new( -source => 'meme' );
106 my $line;
107 my $good_align_sec = 0;
108 my $in_align_sec = 0;
109 my $evalue;
110 while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
111 if ( !$in_align_sec ) {
113 # Check for the meme header
114 if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
115 $self->{'meme_vers'} = $1;
116 my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
117 $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
118 $self->{'seen_header'} = 1;
121 # Check if they've output the HTML version
122 if ( $line =~ /\<TITLE\>/i ) {
123 $self->throw($HTML_VERS_ERR);
126 # Grab the evalue
127 if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
128 $self->throw($MEME_NO_HEADER_ERR)
129 unless ( $self->{'seen_header'} );
130 $evalue = $1;
133 # Check if we're going into an alignment section
134 if ( $line =~ /sites sorted by position/ ) {
135 $self->throw($MEME_NO_HEADER_ERR)
136 unless ( $self->{'seen_header'} );
137 $in_align_sec = 1;
140 # The first regexp is for version 3, the second is for version 4
141 elsif ( $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
142 \S+\s+[.A-Z\-]*\s+([A-Z\-]+)\s+
143 ([.A-Z\-]*)/xi
145 $line =~ /^(\S+)\s+([+-]?)\s+(\d+)\s+
146 \S+\s+\.\s+([A-Z\-]+)/xi
149 # Got a sequence line
150 my $seq_name = $1;
151 my $strand = ( $2 eq '-' ) ? -1 : 1;
152 my $start_pos = $3;
153 my $central = uc($4);
155 # my $p_val = $4;
156 # my $left_flank = uc($5);
157 # my $right_flank = uc($7);
159 # Info about the flanking sequence
160 # my $start_len = ($strand > 0) ? length($left_flank) :
161 # length($right_flank);
162 # my $end_len = ($strand > 0) ? length($right_flank) :
163 # length($left_flank);
165 # Make the sequence. Meme gives the start coordinate at the left
166 # hand side of the motif relative to the INPUT sequence.
167 my $end_pos = $start_pos + length($central) - 1;
168 my $seq = Bio::LocatableSeq->new(
169 -seq => $central,
170 -display_id => $seq_name,
171 -start => $start_pos,
172 -end => $end_pos,
173 -strand => $strand,
174 -alphabet => $self->alphabet,
177 # Add the sequence motif to the alignment
178 $aln->add_seq($seq);
180 elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
182 # These are acceptable things to be in the site section
184 elsif ( $line =~ /^\s*$/ ) {
186 # This ends the site section
187 $in_align_sec = 0;
188 $good_align_sec = 1;
190 else {
191 $self->warn("Unrecognized format:\n$line");
192 return;
196 # Signal an error if we didn't find a header section
197 $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
199 if ($good_align_sec) {
200 $aln->score($evalue);
201 return $aln;
204 return;
207 =head2 write_aln
209 Title : write_aln
210 Usage : $stream->write_aln(@aln)
211 Function: Not implemented
212 Returns : 1 for success and 0 for error
213 Args : Bio::SimpleAlign object
215 =cut
217 sub write_aln {
218 my ( $self, @aln ) = @_;
219 $self->throw_not_implemented();
222 # ----------------------------------------
223 # - Private methods
224 # ----------------------------------------
226 sub _initialize {
227 my ( $self, @args ) = @_;
229 # Call into our base version
230 $self->SUPER::_initialize(@args);
232 # Then initialize our data variables
233 $self->{'seen_header'} = 0;