4 CXGN::Phylo::File - a class to read different tree files.
8 my $file = CXGN::Phylo::File->new($filename);
9 my @node_names = $file->get_node_names();
10 my $tree_string = $file -> get_tree_string();
15 Lukas Mueller (lam87@cornell.edu)
19 package CXGN
::Phylo
::File
;
26 my $self = bless $args, $class;
32 $self->set_file_type($self->determine_filetype($file));
34 if ($self->get_file_type() eq "nexus") {
35 print STDERR
"READING a NEXUS FILE!\n\n";
36 open (F
, "<$file") || die "Can't open file \"$file\".\n";
39 if (/^>/) { next; } # skip lines that start with >
40 if (/^\#/) { next; } # and #
41 if (/^\[|^\]/) { next; } # and [ or ]
42 if (/Translate/i) { $in_translation = 1;} # lets get the node names
43 if (/^\s+\;/ && $in_translation) { $in_translation = 0; } # until that section is over
44 if (($in_translation) && /^\s*(\d+)\s+(.[A-Za-z._\-0-9]+),?/) { $ids{$1}=$2; } # the leaf node names are coded with a number
45 if (/^\s*tree/i) { $in_tree = 1; } # finally, the tree!
46 if (/^\s*End;/i) { $in_tree = 0; }
47 if ($in_tree) { $newick .= $_ } #allow newick to span multiple lines (forgiving)
51 # throw away trash chars before the newick expression begins
53 #translate the newick:
55 foreach my $k (keys %ids){
57 $newick =~ s/\b$k\b/$v/;
60 $newick =~ s/\n|\r//g;
61 $newick =~ s/^(tree.*?)(\(.*)$/$2/;
63 $self->set_node_names(\
%ids);
66 # print STDERR "Reading plain newick file $file...\n";
67 open (F
, "<".$file) || die "Can't open file $file\n";
74 $self->set_tree_string($newick);
79 =head2 function determine_filetype()
82 Arguments: a filename, possibly including a path
83 Returns: "nexus" if the file is of type nexus
84 "newick" if the file contains a plain newick expression
90 sub determine_filetype
{
93 open(TEST
, "<$filename") || die "Can't open file $filename ...";
96 if ($line =~ /NEXUS/i) { return "nexus"; }
97 if ($line =~ /\(/) { return "newick"; }
102 =head2 function get_tree_string()
112 sub get_tree_string
{
114 return $self->{tree_string
};
117 =head2 function set_tree_string()
127 sub set_tree_string
{
129 $self->{tree_string
}=shift;
132 =head2 function get_file_type()
144 return $self->{file_type
};
147 =head2 function set_file_type()
159 $self->{file_type
}=shift;
162 =head2 function get_node_names()
174 return $self->{node_names
};
177 =head2 function set_node_names()
189 $self->{node_names
}=shift;
192 =head2 function get_tree()
196 Returns: a tree object
204 my $tree_parser=CXGN
::Phylo
::Parse_newick
->new($self->get_tree_string());
205 $self->{tree
}=$tree_parser->parse();
206 # if it was a nexus file, replace the node names with the actual
207 # names which are available through get_node_names().
209 my $trans_hash_ref = $self->get_node_names();
210 foreach my $k (keys %$trans_hash_ref) {
211 my $node = $self->{tree
}->get_node_by_name($k);
213 $node->set_name($$trans_hash_ref{$k});
216 return $self->{tree
};