4 CXGN::Phenotypes::File - a class to parse out information from the files that phenotype searching returns
10 Lukas Mueller <lam87@cornell.edu>
14 package CXGN
::Phenotypes
::File
;
18 use File
::Slurp qw
| slurp
|;
20 has
'file' => (is
=> 'rw', isa
=> 'Str');
22 has
'factors' => ( is
=> 'rw', isa
=> 'ArrayRef' );
24 has
'traits' => ( is
=> 'rw', isa
=> 'ArrayRef' );
26 has
'levels' => ( is
=> 'rw', isa
=> 'HashRef' );
28 has
'remove_quotes' => (is
=> 'rw', isa
=> 'Bool', default => sub { return 1; } );
30 our $FACTOR_COUNT = 38; # number of columns in the file before traits columns start
34 my @lines = slurp
($self->file());
36 my $header = $lines[0];
39 my @keys = split("\t", $header);
41 if ($self->remove_quotes()) {
42 foreach my $k (@keys) {
43 #print STDERR "Removing quotes from $k...";
44 $k=~ s/^\"(.*)\"$/$1/;
45 #print STDERR "Now $k...\n";
54 for (my $i=1; $i<@lines; $i++) {
55 my @fields = split /\t/, $lines[$i];
56 for(my $n=0; $n <@keys; $n++) {
57 if ($self->remove_quotes()) {
58 #print STDERR "Removing quotes from $fields[$n]...";
59 $fields[$n]=~ s/^\"(.*)\"$/$1/;
60 #print STDERR "Now $fields[$n]...\n";
63 if (exists($fields[$n]) && defined($fields[$n])) {
64 $line{$keys[$n]}=$fields[$n];
66 $levels{$keys[$n]}->{fields
}->{$fields[$n]}++;
67 $levels{$keys[$n]}->{distinct
} = scalar(keys(%{$levels{$keys[$n]}->{fields
}}));
73 $self->factors( [ @keys[0..$FACTOR_COUNT] ] );
74 $self->traits( [ @keys[ $FACTOR_COUNT+1..scalar(@keys) ] ] );
76 $self->levels(\
%levels);
80 sub distinct_levels_for_factor
{
84 #print STDERR "LEVELS: ".Dumper($self->levels());
85 return $self->levels()->{$factor}->{distinct
};