Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / lib / CXGN / SNPsIO.pm
blobc48c9d2d406707fa50d277e2379e72c18086e5a2
2 =head1 NAME
4 CXGN::SNPsIO;
6 =head1 DESCRIPTION
8 =head1 AUTHOR
10 =cut
12 package CXGN::SNPsIO;
14 use Moose;
16 use Data::Dumper;
17 use IO::File;
18 use CXGN::Genotype::SNP;
20 has 'file' => ( isa => 'Str',
21 is => 'rw',
22 required => 1,
25 has 'fh' => (isa => 'FileHandle',
26 is => 'rw',
29 has 'header' => ( isa => 'Str',
30 is => 'rw',
33 has 'accessions' => ( isa => 'ArrayRef',
34 is => 'rw',
37 has 'ignore_accessions' => (isa => 'HashRef',
38 is => 'rw',
41 has 'valid_accessions' => (isa => 'ArrayRef',
42 is => 'rw',
45 has 'filter' => ( isa => 'Str',
46 is => 'rw',
51 sub BUILD {
52 my $self = shift;
53 my $args = shift;
55 my $fh = IO::File->new($args->{file});
57 while (<$fh>) {
58 chomp;
60 if (m/^\#\#/) {
61 next;
64 if (m/^\#CHROM/) {
65 chomp($_);
66 #print STDERR "found header $_\n";
67 $self->header($_);
68 # CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SNPS..
69 my @fields = split /\s+/, $_;
70 chomp(@fields);
71 my @accessions = @fields[9..$#fields];
72 $self->accessions(\@accessions);
73 print STDERR "ACCESSIONS: ".join("|", @accessions)."\n";
74 last;
77 $self->fh($fh);
80 sub next_line {
81 my $self = shift;
83 my $line = "";
84 my $fh = $self->fh();
85 if ($line = <$fh>) {
86 chomp($line);
87 return $line;
89 else {
90 if (! $self->header() ) {
91 die "Header not seen. This file may have format problems.\n";
93 return undef;
97 sub next {
98 my $self = shift;
99 my $line = $self->next_line();
101 if (defined($line)) {
103 my ($chr, $position, $snp_id, $ref_allele, $alt_allele, $qual, $filter, $info, $format, @snps) = split /\t/, $line;
104 #print STDERR "Processing $snp_id\n";
105 my $snps = CXGN::SNPs->new( );
106 $snps->raw($line);
107 $snps->id($snp_id);
108 $snps->chr($chr);
109 $snps->position($position);
110 $snps->ref_allele($ref_allele);
111 $snps->alt_allele($alt_allele);
112 $snps->qual($qual);
113 $snps->filter($filter);
114 my $depth = $info;
115 $depth =~ s/DP=(\d+)/$1/i;
116 if ($depth eq ".") { $depth=0; }
117 $snps->depth($depth);
118 $snps->info($info);
119 $snps->format($format);
120 $snps->accessions($self->accessions());
121 $snps->valid_accessions($self->accessions());
122 #print STDERR "ACCESSIONS: ".(join ",", @{$self->{accessions}})."\n";
123 my %snp_objects = ();
124 for(my $n=0; $n<@snps; $n++) {
125 my $snp = CXGN::Genotype::SNP->new( { id=>$snp_id, format => $format, vcf_string => $snps[$n] } );
127 my $accession = $self->accessions()->[$n]."\n";
128 chomp($accession);
129 #print "ACCESSION: $accession\n";
130 #if ($accession =~ m/1002:250060174/) { print STDERR "Saw accession 1002:250060174" }
131 $snp->accession($accession);
132 $snp_objects{$accession} = $snp;
134 #print STDERR Dumper(\%snp_objects);
135 $snps->snps(\%snp_objects);
136 return $snps;
138 else {
139 #print STDERR "LINE NOT DEFINED.\n";
140 return undef;
144 sub close {
145 my $self = shift;
146 close($self->fh());
149 sub total_accessions {
150 my $self = shift;
151 return scalar(@{$self->accessions});
155 __PACKAGE__->meta->make_immutable;