2 # BioPerl module for Bio::AlignIO::meme
3 # Based on the Bio::SeqIO modules
4 # by Ewan Birney <birney@ebi.ac.uk>
5 # and Lincoln Stein <lstein@cshl.org>
6 # and the SimpleAlign.pm module of Ewan Birney
8 # Copyright Benjamin Berman
10 # You may distribute this module under the same terms as perl itself
14 Bio::AlignIO::meme - meme sequence input/output stream
18 Do not use this module directly. Use it via the Bio::AlignIO class.
21 # read in an alignment from meme
22 my $in = Bio::AlignIO->new(-format => 'meme',
24 while( my $aln = $in->next_aln ) {
25 # do something with the alignment
30 This object transforms the "sites sorted by position p-value" sections
31 of a meme (text) output file into a series of Bio::SimpleAlign
32 objects. Each SimpleAlign object contains Bio::LocatableSeq
33 objects which represent the individual aligned sites as defined by
34 the central portion of the "site" field in the meme file. The start
35 and end coordinates are derived from the "Start" field. See
36 L<Bio::SimpleAlign> and L<Bio::LocatableSeq> for more information.
38 This module can only parse MEME version 3 and 4. Previous
39 versions have output formats that are more difficult to parse
40 correctly. If the meme output file is not version 3.0 or greater
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 the bugs and their resolution. Bug reports can be submitted via the
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHORS - Benjamin Berman
66 Bbased on the Bio::SeqIO modules by Ewan Birney and others
67 Email: benb@fruitfly.berkeley.edu
71 The rest of the documentation details each of the object
72 methods. Internal methods are usually preceded with an
77 # Let the code begin...
79 package Bio
::AlignIO
::meme
;
81 use Bio
::LocatableSeq
;
83 use base
qw(Bio::AlignIO);
87 "MEME output file must be generated by version 3.0 or higher";
88 my $MEME_NO_HEADER_ERR =
89 "MEME output file contains no header line (ex: MEME version 3.0)";
90 my $HTML_VERS_ERR = "MEME output file must be generated with the -text option";
95 Usage : $aln = $stream->next_aln()
96 Function: returns the next alignment in the stream
97 Returns : Bio::SimpleAlign object with the score() set to the evalue of the
105 my $aln = Bio
::SimpleAlign
->new( -source
=> 'meme' );
107 my $good_align_sec = 0;
108 my $in_align_sec = 0;
110 while ( !$good_align_sec && defined( $line = $self->_readline() ) ) {
111 if ( !$in_align_sec ) {
113 # Check for the meme header
114 if ( $line =~ /^\s*MEME\s+version\s+(\S+)/ ) {
115 $self->{'meme_vers'} = $1;
116 my ($vers) = $self->{'meme_vers'} =~ /^(\d)/;
117 $self->throw($MEME_VERS_ERR) unless ( $vers >= 3 );
118 $self->{'seen_header'} = 1;
121 # Check if they've output the HTML version
122 if ( $line =~ /\<TITLE\>/i ) {
123 $self->throw($HTML_VERS_ERR);
127 if ( $line =~ /MOTIF\s+\d+\s+width.+E-value = (\S+)/ ) {
128 $self->throw($MEME_NO_HEADER_ERR)
129 unless ( $self->{'seen_header'} );
133 # Check if we're going into an alignment section
134 if ( $line =~ /sites sorted by position/ ) {
135 $self->throw($MEME_NO_HEADER_ERR)
136 unless ( $self->{'seen_header'} );
140 # The first regexp is for version 3, the second is for version 4
141 elsif ( $line =~ /^(\S
+)\s
+([+-]?
)\s
+(\d
+)\s
+
142 \S
+\s
+[.A
-Z\
-]*\s
+([A
-Z\
-]+)\s
+
145 $line =~ /^(\S
+)\s
+([+-]?
)\s
+(\d
+)\s
+
146 \S
+\s
+\
.\s
+([A
-Z\
-]+)/xi
149 # Got a sequence line
151 my $strand = ( $2 eq '-' ) ?
-1 : 1;
153 my $central = uc($4);
156 # my $left_flank = uc($5);
157 # my $right_flank = uc($7);
159 # Info about the flanking sequence
160 # my $start_len = ($strand > 0) ? length($left_flank) :
161 # length($right_flank);
162 # my $end_len = ($strand > 0) ? length($right_flank) :
163 # length($left_flank);
165 # Make the sequence. Meme gives the start coordinate at the left
166 # hand side of the motif relative to the INPUT sequence.
167 my $end_pos = $start_pos + length($central) - 1;
168 my $seq = Bio
::LocatableSeq
->new(
170 -display_id
=> $seq_name,
171 -start
=> $start_pos,
174 -alphabet
=> $self->alphabet,
177 # Add the sequence motif to the alignment
180 elsif ( ( $line =~ /^\-/ ) || ( $line =~ /Sequence name/ ) ) {
182 # These are acceptable things to be in the site section
184 elsif ( $line =~ /^\s*$/ ) {
186 # This ends the site section
191 $self->warn("Unrecognized format:\n$line");
196 # Signal an error if we didn't find a header section
197 $self->throw($MEME_NO_HEADER_ERR) unless ( $self->{'seen_header'} );
199 if ($good_align_sec) {
200 $aln->score($evalue);
210 Usage : $stream->write_aln(@aln)
211 Function: Not implemented
212 Returns : 1 for success and 0 for error
213 Args : Bio::SimpleAlign object
218 my ( $self, @aln ) = @_;
219 $self->throw_not_implemented();
222 # ----------------------------------------
224 # ----------------------------------------
227 my ( $self, @args ) = @_;
229 # Call into our base version
230 $self->SUPER::_initialize
(@args);
232 # Then initialize our data variables
233 $self->{'seen_header'} = 0;