Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / AlignIO / maf.pm
blobfdcd9fa1512787ea503856666d64ad1899894ba5
2 # BioPerl module for Bio::AlignIO::maf
4 # Copyright Allen Day
7 =head1 NAME
9 Bio::AlignIO::maf - Multiple Alignment Format sequence input stream
11 =head1 SYNOPSIS
13 Do not use this module directly. Use it via the Bio::AlignIO class.
15 use Bio::AlignIO;
17 my $alignio = Bio::AlignIO->new(-fh => \*STDIN, -format => 'maf');
19 while(my $aln = $alignio->next_aln()){
20 my $match_line = $aln->match_line;
22 print $aln, "\n";
24 print $aln->length, "\n";
25 print $aln->num_residues, "\n";
26 print $aln->is_flush, "\n";
27 print $aln->num_sequences, "\n";
29 $aln->splice_by_seq_pos(1);
31 print $aln->consensus_string(60), "\n";
32 print $aln->get_seq_by_pos(1)->seq, "\n";
33 print $aln->match_line(), "\n";
35 print "\n";
38 =head1 DESCRIPTION
40 This class constructs Bio::SimpleAlign objects from an MAF-format
41 multiple alignment file.
43 Writing in MAF format is currently unimplemented.
45 Spec of MAF format is here:
46 http://genome.ucsc.edu/FAQ/FAQformat
48 =head1 FEEDBACK
50 =head2 Support
52 Please direct usage questions or support issues to the mailing list:
54 I<bioperl-l@bioperl.org>
56 rather than to the module maintainer directly. Many experienced and
57 reponsive experts will be able look at the problem and quickly
58 address it. Please include a thorough description of the problem
59 with code and data examples if at all possible.
61 =head2 Reporting Bugs
63 Report bugs to the Bioperl bug tracking system to help us keep track
64 the bugs and their resolution. Bug reports can be submitted via the
65 web:
67 https://github.com/bioperl/bioperl-live/issues
69 =head1 AUTHORS - Allen Day
71 Email: allenday@ucla.edu
73 =head1 APPENDIX
75 The rest of the documentation details each of the object
76 methods. Internal methods are usually preceded with a _
78 =cut
80 # Let the code begin...
82 package Bio::AlignIO::maf;
84 use strict;
86 use Bio::SimpleAlign;
88 use base qw(Bio::AlignIO);
90 =head2 new
92 Title : new
93 Usage : my $alignio = Bio::AlignIO->new(-format => 'maf'
94 -file => '>file',
95 -idlength => 10,
96 -idlinebreak => 1);
97 Function: Initialize a new L<Bio::AlignIO::maf> reader
98 Returns : L<Bio::AlignIO> object
99 Args :
101 =cut
103 sub _initialize {
104 my($self,@args) = @_;
105 $self->SUPER::_initialize(@args);
110 =head2 next_aln
112 Title : next_aln
113 Usage : $aln = $stream->next_aln()
114 Function: returns the next alignment in the stream.
115 Throws an exception if trying to read in PHYLIP
116 sequential format.
117 Returns : L<Bio::SimpleAlign> object
118 Args :
120 =cut
122 sub next_aln {
123 my $self = shift;
125 # check beginning of file for proper header
126 if(!$self->{seen_header}){
127 my $line = $self->_readline;
128 $self->throw("This doesn't look like a MAF file. First line should start with ##maf, but it was: ".$line)
129 unless $line =~ /^##maf/;
130 $self->{seen_header} = 1;
131 # keep in case we parse this later
132 $self->_pushback($line);
135 my $aln = Bio::SimpleAlign->new(-source => 'maf');
137 my($aline, @slines, $seen_aline);
138 while(my $line = $self->_readline()){
139 if ($line =~ /^a\s/xms) {
140 # next block?
141 if ($seen_aline) {
142 $self->_pushback($line);
143 last;
145 $aline = $line;
146 $seen_aline++;
147 } elsif ($line =~ /^s\s/xms) {
148 push @slines, $line;
149 } else {
150 # missed lines
151 $self->debug($line);
155 # all MAF starts with 'a' line
156 return unless $aline;
158 my($kvs) = $aline =~ /^a\s+(.+)$/;
159 my @kvs = split /\s+/, $kvs if $kvs;
160 my %kv;
161 foreach my $kv (@kvs){
162 my($k,$v) = $kv =~ /(.+)=(.+)/;
163 $kv{$k} = $v;
166 $aln->score($kv{score});
168 foreach my $sline (@slines){
169 my($s,$src,$start,$size,$strand,$srcsize,$text) =
170 split /\s+/, $sline;
171 # adjust coordinates to be one-based inclusive
172 $start = $start + 1;
173 $strand = $strand eq '+' ? 1 : $strand eq '-' ? -1 : 0;
174 my $seq = Bio::LocatableSeq->new('-seq' => $text,
175 '-display_id' => $src,
176 '-start' => $strand > 0 ? $start : ($srcsize-($start+$size-2)),
177 '-end' => $strand > 0 ? ($start + $size - 1) : ($srcsize-($start-1)) ,
178 '-strand' => $strand,
179 '-alphabet' => $self->alphabet,
181 $aln->add_seq($seq);
184 return $aln if $aln->num_sequences;
185 return;
188 sub write_aln {
189 shift->throw_not_implemented