5 gen_accession_jbrowse.pl - creates jbrowse instances that include base tracks and accession vcf tracks for each accession name supplied
9 gen_accession_jbrowse.pl -v [absolute path through base instance to dir containing vcfs] -b [absolute path to base instance] -d [name of database to search for trials] -h [db host]
11 =head1 COMMAND-LINE OPTIONS
13 -v absolute path through base instance to dir containing vcfs
14 -b absolute path to base instance
20 Takes a directory containing individual vcf files, including filtered and imputed versions, and creates or updates jbrowse instances by symlinking to all necessary files in a base instance, then generating a uniq tracks.conf file and appending dataset info to jbrowse.conf. Should be run from /data/json/accessions dir in jbrowse instance.
24 Bryan Ellerbrock (bje24@cornell.edu) - Oct 2015
31 use Bio
::Chado
::Schema
;
32 use CXGN
::DB
::InsertDBH
;
34 our ($opt_v, $opt_b, $opt_h, $opt_d);
36 #-----------------------------------------------------------------------
37 # define paths & array of vcf_file names. Open file handles to read accession lists and append datasets to jbrowse.conf
38 #-----------------------------------------------------------------------
42 my $vcf_dir_path = $opt_v;
45 my $link_path = $opt_b;
46 my $url_path = $opt_v;
47 $url_path =~ s
:$link_path/(.+):$1:;
48 print STDERR
"url path = $url_path \n";
50 my ($file_type,$out,$header,@tracks,$imp_track,$filt_track,$h,$q,$dbh);
51 my @files = ` dir -GD -1 --hide *.tbi $vcf_dir_path ` ;
53 open (CONF
, ">>", "../../../jbrowse.conf") || die "Can't open conf file jbrowse.conf!\n";
55 open (LOG
, ">>", "gen_accession.log") || die "Can't open log file!\n";
57 #-----------------------------------------------------------------------
58 # connect to database and extract unique acccesion names
59 #-----------------------------------------------------------------------
61 #my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() });
63 $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
65 dbargs
=> {AutoCommit
=> 0,
71 $q = "SELECT distinct(stock.stock_id), stock.uniquename from stock where type_id = 76392 order by stock.uniquename";
77 #-----------------------------------------------------------------------
78 # for each accession name, locate matching indiv vcf files and construct necessary text for tracks.conf
79 #-----------------------------------------------------------------------
81 #print STDERR "fetchrow array = $h->fetchrow_array \n";
83 while (my ($id, $name) = $h->fetchrow_array) {
86 print LOG
"id = $id and name = $name \n";
87 $out = "$id/tracks.conf";
88 $header = "[general]\ndataset_id = Accession_$id\n";
90 for my $file (@files) {
93 next if !m/filtered/ && !m/imputed/;
94 $_ =~ s/([^.]+)_2015_V6.*/$1/s;
95 next if ($_ ne $name);
96 print STDERR
"Matched vcf file basename $_ to accession name $name !\n";
97 print LOG
"Matched vcf file basename $_ to accession name $name !\n";
99 $file_type =~ s/.+_([imputedflr]+).vcf.gz/$1/s;
100 print LOG
"filetype = $file_type \n";
101 if ($file_type eq 'filtered') { #Handle filtered vcf files
102 print LOG
"Working on filtered file $file \n" ;
103 print STDERR
"Working on filtered file $file \n" ;
104 my $path = $url_path . "/" . $file ;
106 $key =~ s/(.+).vcf.gz/$1/s;
107 print LOG
"Key = $key \n";
110 [ tracks . ' . $key . ' ]
111 hooks.modify = function( track, feature, div ) { div.style.backgroundColor = track.config.variantIsHeterozygous(feature);}
112 variantIsHeterozygous = function( feature ) {
113 var genotypes = feature.get(\'genotypes\');
114 for( var sampleName in genotypes ) {
116 var gtString = genotypes[sampleName].GT.values[0];
117 if( ! /^1([\|\/]1)*$/.test( gtString) && ! /^0([\|\/]0)*$/.test( gtString ) )
121 if( /^1([\|\/]1)*$/.test( gtString) )
125 storeClass = JBrowse/Store/SeqFeature/VCFTabix
126 urlTemplate = ' . $path .'
127 category = Diversity/NEXTGEN/Unimputed
128 type = JBrowse/View/Track/HTMLVariants
129 metadata.Description = Homozygous reference: Green Heterozygous: Red Homozygous alternate: Blue Filtered to remove SNPs with a depth of 0 and SNPs with 2 or more alt alleles.
130 metadata.Link = <a href=ftp://cassavabase.org/jbrowse/diversity/igdBuildWithV6_hapmap_20150404_vcfs/' . $file . '>Download VCF File</a>
131 metadata.Provider = NEXTGEN Cassava project
132 metadata.Accession = ' . $name . '
135 push @tracks, $filt_track;
137 } elsif ($file_type eq 'imputed') { #Handle imputed vcf files
139 print STDERR
"Working on imputed file $file \n" ;
140 print LOG
"Working on imputed file $file \n" ;
141 my $path = $url_path . "/" . $file ;
143 $key =~ s/(.+).vcf.gz/$1/s;
144 print LOG
"Key = $key \n";
147 [ tracks . ' . $key . ' ]
148 hooks.modify = function( track, feature, div ) { div.style.backgroundColor = track.config.variantIsHeterozygous(feature); div.style.opacity = track.config.variantIsImputed(feature) ? \'0.33\' : \'1.0\'; }
149 variantIsHeterozygous = function( feature ) {
150 var genotypes = feature.get(\'genotypes\');
151 for( var sampleName in genotypes ) {
153 var gtString = genotypes[sampleName].GT.values[0];
154 if( ! /^1([\|\/]1)*$/.test( gtString) && ! /^0([\|\/]0)*$/.test( gtString ) )
158 if( /^1([\|\/]1)*$/.test( gtString) )
161 variantIsImputed = function( feature ) {
162 var genotypes = feature.get(\'genotypes\');
163 for( var sampleName in genotypes ) {
165 var dpString = genotypes[sampleName].DP.values[0];
166 if( /^0$/.test( dpString) )
173 storeClass = JBrowse/Store/SeqFeature/VCFTabix
174 urlTemplate = ' . $path .'
175 category = Diversity/NEXTGEN/Imputed
176 type = JBrowse/View/Track/HTMLVariants
177 metadata.Description = Homozygous reference: Green Heterozygous: Red Homozygous alternate: Blue Values imputed with GLMNET are shown at 1/3 opacity.
178 metadata.Link = <a href=ftp://cassavabase.org/jbrowse/diversity/igdBuildWithV6_hapmap_20150404_vcfs/' . $file . '>Download VCF File</a>
179 metadata.Provider = NEXTGEN Cassava project
180 metadata.Accession = ' . $name . '
183 push @tracks, $imp_track
192 #-----------------------------------------------------------------------
193 # if matching vcf files were not found, skip to next accession name
194 #-----------------------------------------------------------------------
196 next unless (@tracks);
198 #-----------------------------------------------------------------------
199 # unless it already exisits, create dir for new jbrowse instance and create symlinks to files in base jbrowse instance
200 #-----------------------------------------------------------------------
204 `sudo ln -sf $link_path/data_files $id/data_files`;
205 `sudo ln -sf $link_path/seq $id/seq`;
206 `sudo ln -sf $link_path/tracks $id/tracks`;
207 `sudo ln -sf $link_path/trackList.json $id/trackList.json`;
208 `sudo ln -sf $link_path/readme $id/readme`;
209 `sudo touch $id/tracks.conf && sudo chmod a+wrx $id/tracks.conf`;
211 #-----------------------------------------------------------------------
212 # also append dataset info to jbrowse.conf, and create and populate accession specific tracks.conf file
213 #-----------------------------------------------------------------------
215 my $dataset = "[datasets.Accession_$id]\n" . "url = ?data=data/json/accessions/$id\n" . "name = $name\n\n";
217 open (OUT
, ">", $out) || die "Can't open out file $out! \n";
219 print OUT
join ("\n", @tracks), "\n";
223 #-----------------------------------------------------------------------
224 # if we are just adding a new track, append to tracks.conf
225 #-----------------------------------------------------------------------
227 open (OUT
, ">>", $out) || die "Can't open out file $out! \n";
228 print OUT
join ("\n", @tracks), "\n";