limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / bsvf / bsdbg
blobc8ec0c9388fe07aa14b9ea197e2cbbd626c68e56
1 #!/usr/bin/env perl
2 use strict;
3 use warnings;
4 use Data::Dump qw(ddx);
6 use FindBin qw($RealBin);
7 if ($FindBin::VERSION < 1.51) {
8 warn "[!]Your Perl is too old, thus there can only be ONE `bsuit` file in your PATH. [FindBin Version: $FindBin::VERSION < 1.51]\n\n"
10 FindBin::again();
11 use lib "$RealBin/lib";
12 require BSuitLib;
13 use Galaxy::IO::INI;
15 my %Cmd2Fun = (
16 'prepare' => \&do_pre,
17 'grep' => \&do_grep,
18 'analyse' => \&do_analyse,
21 sub ShowHelp() {
22 die '[!]Available Commands are: [',join('],[',sort keys %Cmd2Fun),"], case insensitive.\n";
24 my $ShowHelp=0;
25 if (@ARGV < 2) {
26 warn "Usage: $0 <command> Human=hg19.fa,Virus=hbv.fa,bam=Sorted.bam,out=./test/\n";
27 ShowHelp();
29 my $cmd=lc shift;
30 my $cfgdat=shift;
31 #die "[!]Config File [$cfgfile] NOT found !\n" unless -f $cfgfile;
32 my %CFG = (
33 human => '',
34 virus => '',
35 bam => '',
36 out => './test'
38 for ( split /\,/,$cfgdat ) {
39 my ($id,$val) = split /=/,$_;
40 $id = lc $id;
41 next unless exists $CFG{$id};
42 $CFG{$id} = $val;
44 die "[!]Bam File [$CFG{'bam'}] NOT found !\n" unless -f $CFG{'bam'};
45 my $cfgfile = $CFG{'out'} . '/prj.ini';
46 my $thedir = $CFG{'out'} . '/Project1_aln';
47 unless (-d $thedir) {
48 File::Path::make_path($thedir,{verbose => 0,mode => 0755});
50 link $CFG{'bam'},$thedir.'/Test.bam';
52 open O,'>',$cfgfile;
53 print O "
54 [RefFiles]
55 HostRef=$CFG{'human'}
56 VirusRef=$CFG{'virus'}
58 [DataFiles]
59 Test.1=xx_1.fq
60 Test.2=xx_2.fq
62 [InsertSizes]
63 Test=500
64 Test.SD=50
66 [Output]
67 WorkDir=$CFG{'out'}
68 ProjectID=Project1
70 close O;
72 our $DISABLE_REF_INDEX=1;
74 unless (exists $Cmd2Fun{$cmd}) {
75 warn "[x]Unknown Command: [$cmd] !\n";
76 ShowHelp();
79 #our $CFGminMAPQ = 30;
80 our $minHostDepth = 30;
81 our $minSoftClip = 10;
82 our $methly3BaseErrRate = 0.08;
83 our $DEVELOP = 0;
84 our $DEBUG = 1;
86 our $idbacmd = '--max_gap 100 --min_region 31 --min_contig 31 --mink 17 --maxk 89 --step 20';
87 our $PathPrefix = "PATH=\"$RealBin/bin:\$PATH\";";
89 our $Config = Galaxy::IO::INI->new();
90 $Config->read($cfgfile);
91 #ddx \$Config;
93 warn "[!]Runing $0 [$cmd].\n";
97 our $RootPath = $Config->{'Output'}->{'WorkDir'};
98 $RootPath =~ s/[\/\\]+$//g;
99 our $ProjectID = $Config->{'Output'}->{'ProjectID'};
100 warn "[!] Working on: [$ProjectID] @ [$RootPath]\n";
102 our $HostRefName = basename($Config->{'RefFiles'}->{'HostRef'});
103 our $VirusRefName = basename($Config->{'RefFiles'}->{'VirusRef'});
104 our $RefFilesSHA = getFilesHash($HostRefName,$VirusRefName);
106 our $RefConfig = Galaxy::IO::INI->new();
107 our (%RefChrIDs,%VirusChrIDs);
109 $Cmd2Fun{'prepare'}($cfgfile);
111 if ($cmd ne 'prepare') {
112 if ( -f "$RootPath/Ref/Ref.ini" ) {
113 $RefConfig->read("$RootPath/Ref/Ref.ini");
114 } else {die "[x] Prepare INI not found ! [$RootPath/Ref/Ref.ini]\n";}
115 my @RefChrID = split(',',$RefConfig->{$RefFilesSHA}->{'RefChrIDs'});
116 my @VirusChrID = split(',',$RefConfig->{$RefFilesSHA}->{'VirusChrIDs'});
117 for (@RefChrID) {
118 $RefChrIDs{$_} = $RefConfig->{$RefFilesSHA}->{$_};
120 $VirusChrIDs{$_} = $RefConfig->{$RefFilesSHA}->{$_} for @VirusChrID;
123 $Cmd2Fun{$cmd}($cfgfile); # If use `do $Cmd2Fun{$cmd}();`, the sub MUST return 1;
124 warn "[!]done !\n";
126 # overlap = (min(end_mate1,end_mate2) - max(start_mate1,start_mate2)) http://sourceforge.net/p/bio-bwa/mailman/message/27028596/
128 # rsync -Lav --existing . ~/git/BS-viral-inte/