7 our ($opt_v, $opt_d, $opt_f);
11 my $raw_vcf_file = $opt_v;
12 my $dosage_file = $opt_d;
13 my $starting_file = $opt_f;
19 my $this_rep = $starting_file;
20 $this_rep =~ s/^.+rep(\d+).vcf.+$/$1/;
21 print STDERR
"this rep = $this_rep \n";
22 if (length($this_rep) != 1) {
25 print STDERR
"this rep = $this_rep \n";
27 #-----------------------------------------------------------------------
28 # finish indiv vcf file
29 #-----------------------------------------------------------------------
31 my $vcf_file = $starting_file;
32 $vcf_file =~ s/^(.+)_(\d+)$/$1/;
33 print STDERR
"finished filename = $vcf_file \n";
35 my $accession_name = $starting_file;
36 $accession_name =~ s
:^output
/(.+)_2015_V6
.+:$1:;
37 print STDERR
"accession name = $accession_name \n";
39 my $column_number = $starting_file;
40 $column_number =~ s/^(.+)_(\d+)$/$2/;
41 print STDERR
"column number = $column_number \n";
43 system ("cut -f 1-9,$column_number $raw_vcf_file | tail -n +15761 >> $starting_file");
44 system ("mv $starting_file $vcf_file");
45 print "$vcf_file finished!\n";
47 #-----------------------------------------------------------------------
48 # extract imputed SNP data pertaining to this accession from dosage file
49 #-----------------------------------------------------------------------
51 open (IMPS
, "<", $dosage_file) || die "Can't open $dosage_file!\n";
54 ($placeholder, @genotypes) = split /\t/;
55 unless ($placeholder eq $accession_name) {
59 if ($num_seen < $this_rep) {
64 open (VCF
, "<", $vcf_file) || die "Can't open $vcf_file!\n";
65 $imputed_file = $vcf_file;
66 $imputed_file =~ s/.vcf/_imputed.vcf/;
67 $filtered_file = $vcf_file;
68 $filtered_file =~ s/.vcf/_filtered.vcf/;
70 open (OUTFILE
, ">", $imputed_file) || die "Can't open $imputed_file!\n";
71 open (OUTFILE2
, ">", $filtered_file) || die "Can't open $filtered_file!\n";
79 my ($CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, $DATA) = split /\t/;
80 if (length($ALT) > 1) {
86 print OUTFILE
join "\t", $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, $DATA;
88 print OUTFILE2
join "\t", $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, $DATA;
92 $_ = shift @genotypes;
94 $DATA =~ s/\.\/\./0\
/0/;
95 } elsif ($_ >= 0.90 && $_ <= 1.10) {
96 $DATA =~ s/\.\/\./0\
/1/;
97 } elsif ($_ >= 1.90) {
98 $DATA =~ s/\.\/\./1\
/1/;
102 print OUTFILE
join "\t", $CHROM, $POS, $ID, $REF, $ALT, $QUAL, $FILTER, $INFO, $FORMAT, $DATA;
115 #----------------------------------------------------
116 # Report a completed accession and compress files
117 #----------------------------------------------------
119 print STDERR
"$vcf_file filtered and imputed files finished\n";
120 system ( "bgzip $vcf_file" );
121 system ( "bgzip $imputed_file" );
122 system ( "bgzip $filtered_file" );
123 system ( "tabix -p vcf $vcf_file.gz" );
124 system ( "tabix -p vcf $imputed_file.gz" );
125 system ( "tabix -p vcf $filtered_file.gz" );