2 # BioPerl module for Bio::AlignIO::meme
3 # Based on the Bio::SeqIO modules
4 # by Ewan Birney <birney@ebi.ac.uk>
5 # and Lincoln Stein <lstein@cshl.org>
6 # and the SimpleAlign.pm module of Ewan Birney
8 # Copyright Benjamin Berman
10 # You may distribute this module under the same terms as perl itself
14 Bio::AlignIO::meme - meme sequence input/output stream
18 Do not use this module directly. Use it via the Bio::AlignIO class.
21 # read in an alignment from meme
22 my $in = Bio::AlignIO->new(-format => 'meme',
24 while( my $aln = $in->next_aln ) {
25 # do something with the alignment
30 This object transforms the "sites sorted by position p-value" sections
31 of a meme (text) output file into a series of Bio::SimpleAlign
32 objects. Each SimpleAlign object contains Bio::LocatableSeq
33 objects which represent the individual aligned sites as defined by
34 the central portion of the "site" field in the meme file. The start
35 and end coordinates are derived from the "Start" field. See
36 L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
38 This module can only parse MEME version 3 and 4. Previous
39 versions have output formats that are more difficult to parse
40 correctly. If the meme output file is not version 3.0 or greater
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution. Bug reports can be submitted via the
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHORS - Benjamin Berman
66 Bbased on the Bio::SeqIO modules by Ewan Birney and others
67 Email: benb@fruitfly.berkeley.edu
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with an
77 # Let the code begin...
79 package Bio
::AlignIO
::meme
;
82 use Bio
::LocatableSeq
;
84 use base
qw(Bio::AlignIO);
88 "MEME output file must be generated by version 3.0 or higher";
89 my $MEME_NO_HEADER_ERR =
90 "MEME output file contains no header line (ex: MEME version 3.0)";
91 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
96 Usage : $aln = $stream->next_aln()
97 Function: returns the next alignment in the stream
98 Returns : Bio::SimpleAlign object with the score() set to the evalue of the
106 my $aln = Bio
::SimpleAlign
->new( -source
=> 'meme' );
108 my $good_align_sec = 0;
109 my $in_align_sec = 0;
111 while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
112 if ( !$in_align_sec ) {
114 # Check for the meme header
115 if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
116 $self->{'meme_vers'} = $1;
117 my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
118 $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
119 $self->{'seen_header'} = 1;
122 # Check if they've output the HTML version
123 if ( $line =~ /\<TITLE\>/i ) {
124 $self->throw($HTML_VERS_ERR);
128 if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
129 $self->throw($MEME_NO_HEADER_ERR)
130 unless ( $self->{'seen_header'} );
134 # Check if we're going into an alignment section
135 if ( $line =~ /sites sorted by position/ ) {
136 $self->throw($MEME_NO_HEADER_ERR)
137 unless ( $self->{'seen_header'} );
141 # The first regexp is for version 3, the second is for version 4
142 elsif ( $line =~ /^(\S
+)\s
+([+-]?
)\s
+(\d
+)\s
+
143 \S
+\s
+[.A
-Z\
-]*\s
+([A
-Z\
-]+)\s
+
146 $line =~ /^(\S
+)\s
+([+-]?
)\s
+(\d
+)\s
+
147 \S
+\s
+\
.\s
+([A
-Z\
-]+)/xi
150 # Got a sequence line
152 my $strand = ( $2 eq '-' ) ?
-1 : 1;
154 my $central = uc($4);
157 # my $left_flank = uc($5);
158 # my $right_flank = uc($7);
160 # Info about the flanking sequence
161 # my $start_len = ($strand > 0) ? length($left_flank) :
162 # length($right_flank);
163 # my $end_len = ($strand > 0) ? length($right_flank) :
164 # length($left_flank);
166 # Make the sequence. Meme gives the start coordinate at the left
167 # hand side of the motif relative to the INPUT sequence.
168 my $end_pos = $start_pos + length($central) - 1;
169 my $seq = Bio
::LocatableSeq
->new(
171 -display_id
=> $seq_name,
172 -start
=> $start_pos,
175 -alphabet
=> $self->alphabet,
178 # Add the sequence motif to the alignment
181 elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
183 # These are acceptable things to be in the site section
185 elsif ( $line =~ /^\s*$/ ) {
187 # This ends the site section
192 $self->warn("Unrecognized format:\n$line");
197 # Signal an error if we didn't find a header section
198 $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
200 if ($good_align_sec) {
201 $aln->score($evalue);
211 Usage : $stream->write_aln(@aln)
212 Function: Not implemented
213 Returns : 1 for success and 0 for error
214 Args : Bio::SimpleAlign object
219 my ( $self, @aln ) = @_;
220 $self->throw_not_implemented();
223 # ----------------------------------------
225 # ----------------------------------------
228 my ( $self, @args ) = @_;
230 # Call into our base version
231 $self->SUPER::_initialize
(@args);
233 # Then initialize our data variables
234 $self->{'seen_header'} = 0;