Merge pull request #3987 from solgenomics/topic/description_to_text
[sgn.git] / bin / jbrowse_vcf_tools / generate_indiv_vcfs.sh
blobfb6601dd4c798757121492bfa4da28e2e61a062c
1 #!/bin/bash
2 #-------------------------------------------------------------------------------------------------------------------------------
3 # NAME
5 # generate_indiv_vcfs.sh
6 #
7 # SYNOPSIS
8 # Shell script for creating multiple versions of individual VCF files from a single multivcf and imputed dosage file.
9 #
10 # ./generate_indiv_vcfs.sh -v [multivcf file] -d [dosage file]
12 # To run, this script requires create_indiv.pl
13 # finish_indiv.pl
14 #-------------------------------------------------------------------------------------------------------------------------------
17 #--------------------------------------------------------------------------------
18 # 1 Parse command line arguments:
19 #-------------------------------------------------------------------------------
21 while [[ $# > 1 ]]
23 key="$1"
25 case $key in
26 -v|--multi.vcf)
27 MULTI_VCF="$2"
28 shift
30 -d|--dosage)
31 DOSAGE="$2"
33 esac
34 shift
35 done
36 echo MULTI_VCF = "$MULTI_VCF"
37 echo DOSAGE = "$DOSAGE"
38 if [ -z "$MULTI_VCF" ] || [ -z "$DOSAGE" ]
39 then
40 echo "Trouble reading command line arguments, make sure
41 -v [multi vcf file] and
42 -d [dosage file] are both specified";
43 exit
47 #----------------------------------------------------------------------------------
48 # 2 create a nearly empty vcf file for each accession in the multi-vcf
49 #----------------------------------------------------------------------------------
51 echo Creating starter vcf files...
53 mkdir output
55 ./create_indiv.pl -v $MULTI_VCF -o output
57 #--------------------------------------------------------------------------------
58 # 3 add genotype data to complete indiv vcf files. then generate filt and imputed files too. Requires long operations, so do it in parallel to speed it up
59 #-------------------------------------------------------------------------------
61 ls output/* | parallel -j 30 --gnu --verbose "./finish_indiv.pl -v $MULTI_VCF -d $DOSAGE -f {}"