t/AlignIO/AlignIO.t: fix number of tests in plan (fixup c523e6bed866)
[bioperl-live.git] / Bio / AlignIO / maf.pm
blob5f42a2d0c04f4a665bfe3ea0def4b6fff8c55d02
2 # BioPerl module for Bio::AlignIO::maf
4 # Copyright Allen Day
7 =head1 NAME
9 Bio::AlignIO::maf - Multiple Alignment Format sequence input stream
11 =head1 SYNOPSIS
13 Do not use this module directly. Use it via the Bio::AlignIO class.
15 use Bio::AlignIO;
17 my $alignio = Bio::AlignIO->new(-fh => \*STDIN, -format => 'maf');
19 while(my $aln = $alignio->next_aln()){
20 my $match_line = $aln->match_line;
22 print $aln, "\n";
24 print $aln->length, "\n";
25 print $aln->num_residues, "\n";
26 print $aln->is_flush, "\n";
27 print $aln->num_sequences, "\n";
29 $aln->splice_by_seq_pos(1);
31 print $aln->consensus_string(60), "\n";
32 print $aln->get_seq_by_pos(1)->seq, "\n";
33 print $aln->match_line(), "\n";
35 print "\n";
38 =head1 DESCRIPTION
40 This class constructs Bio::SimpleAlign objects from an MAF-format
41 multiple alignment file.
43 Writing in MAF format is currently unimplemented.
45 Spec of MAF format is here:
46 http://genome.ucsc.edu/FAQ/FAQformat
48 =head1 FEEDBACK
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 the bugs and their resolution. Bug reports can be submitted via the
65 web:
67 https://github.com/bioperl/bioperl-live/issues
69 =head1 AUTHORS - Allen Day
71 Email: allenday@ucla.edu
73 =head1 APPENDIX
75 The rest of the documentation details each of the object
76 methods. Internal methods are usually preceded with a _
78 =cut
80 # Let the code begin...
82 package Bio::AlignIO::maf;
83 use strict;
85 use Bio::SimpleAlign;
87 use base qw(Bio::AlignIO);
89 =head2 new
91 Title : new
92 Usage : my $alignio = Bio::AlignIO->new(-format => 'maf'
93 -file => '>file',
94 -idlength => 10,
95 -idlinebreak => 1);
96 Function: Initialize a new L<Bio::AlignIO::maf> reader
97 Returns : L<Bio::AlignIO> object
98 Args :
100 =cut
102 sub _initialize {
103 my($self,@args) = @_;
104 $self->SUPER::_initialize(@args);
109 =head2 next_aln
111 Title : next_aln
112 Usage : $aln = $stream->next_aln()
113 Function: returns the next alignment in the stream.
114 Throws an exception if trying to read in PHYLIP
115 sequential format.
116 Returns : L<Bio::SimpleAlign> object
117 Args :
119 =cut
121 sub next_aln {
122 my $self = shift;
124 # check beginning of file for proper header
125 if(!$self->{seen_header}){
126 my $line = $self->_readline;
127 $self->throw("This doesn't look like a MAF file. First line should start with ##maf, but it was: ".$line)
128 unless $line =~ /^##maf/;
129 $self->{seen_header} = 1;
130 # keep in case we parse this later
131 $self->_pushback($line);
134 my $aln = Bio::SimpleAlign->new(-source => 'maf');
136 my($aline, @slines, $seen_aline);
137 while(my $line = $self->_readline()){
138 if ($line =~ /^a\s/xms) {
139 # next block?
140 if ($seen_aline) {
141 $self->_pushback($line);
142 last;
144 $aline = $line;
145 $seen_aline++;
146 } elsif ($line =~ /^s\s/xms) {
147 push @slines, $line;
148 } else {
149 # missed lines
150 $self->debug($line);
154 # all MAF starts with 'a' line
155 return unless $aline;
157 my($kvs) = $aline =~ /^a\s+(.+)$/;
158 my @kvs = split /\s+/, $kvs if $kvs;
159 my %kv;
160 foreach my $kv (@kvs){
161 my($k,$v) = $kv =~ /(.+)=(.+)/;
162 $kv{$k} = $v;
165 $aln->score($kv{score});
167 foreach my $sline (@slines){
168 my($s,$src,$start,$size,$strand,$srcsize,$text) =
169 split /\s+/, $sline;
170 # adjust coordinates to be one-based inclusive
171 $start = $start + 1;
172 $strand = $strand eq '+' ? 1 : $strand eq '-' ? -1 : 0;
173 my $seq = Bio::LocatableSeq->new('-seq' => $text,
174 '-display_id' => $src,
175 '-start' => $strand > 0 ? $start : ($srcsize-($start+$size-2)),
176 '-end' => $strand > 0 ? ($start + $size - 1) : ($srcsize-($start-1)) ,
177 '-strand' => $strand,
178 '-alphabet' => $self->alphabet,
180 $aln->add_seq($seq);
183 return $aln if $aln->num_sequences;
184 return;
187 sub write_aln {
188 shift->throw_not_implemented