limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / BGI / SOAPdenovo2 / standardPregraph / seperateScaff.pl
blobbada39bde3c11ca248a9ade9a1604eaf269c480f
1 #!/usr/bin/perl -w
2 use strict;
3 use Getopt::Long;
5 =head1 Description
7 Program: seperateScaff.pl
9 Author: BGI-Shenzhen
11 Version: 1.0
13 Contact: soap@genomics.cn
15 This script is used to seperate singletons from scaffolds in *.scafSeq file
16 output by SOAPdenovo. There are two new output files as follow:
18 1) *.scafSeq.scaffold, containing scaffolds only.
20 2) *.scafSeq.singleton, containing singletons only.
22 User can set the minimun length of sequence to output.
24 =head1 Usage
26 perl seperateScaff.pl [options] >seperateScaff.log
28 --scafSeq *.scafSeq file output by SOAPdenovo, required
30 --scafLen minumum length of scaffold to output, default 0
32 --sigtLen minumum length of singleton to output, default 0
34 -h output help information
36 =cut
38 die `pod2text $0` if (@ARGV < 1);
39 my ($scafSeq, $help);
40 my ($scafLen, $sigtLen) = (0, 0);
42 GetOptions (
43 "scafSeq:s"=>\$scafSeq,
44 "scafLen:i"=>\$scafLen,
45 "sigtLen:i"=>\$sigtLen,
46 "h"=>\$help
49 die `pod2text $0` if ($help);
50 if (! -f $scafSeq) {
51 print "\n[Error] $scafSeq does not exist.\n\n";
52 die `pod2text $0`;
55 if ($scafLen < 0) {
56 print "\n[Warning] Set minimum length of scaffold is negative, program will use 0 instead.\n\n";
57 $scafLen = 0;
60 if ($sigtLen < 0) {
61 print "\n[Warning] Set minimum length of singleton is negative, program will use 0 instead.\n\n";
62 $sigtLen = 0;
65 print "Command:\n perl $0 --scafSeq $scafSeq --scafLen $scafLen --sigtLen $sigtLen\n\n";
67 my ($gotScaf, $gotSigt, $id, $seq, $len) = (0, 0, "", "", 0);
68 my ($totalScafNum, $totalScafLen, $totalSigtNum, $totalSigtLen) = (0, 0, 0, 0);
69 my ($outputScafNum, $outputScafLen, $outputSigtNum, $outputSigtLen) = (0, 0, 0, 0);
70 open SCAF, ">$scafSeq.scaffold" or die "Can't open output file: $scafSeq.scaffold\n";
71 open SIGT, ">$scafSeq.singleton" or die "Can't open output file: $scafSeq.singleton\n";
72 open IN, "$scafSeq" or die "Can't open input file: $scafSeq\n";
73 while (<IN>) {
74 if (/^>/) {
75 handleOneSeq();
77 $id = $_;
78 $seq = "";
79 $len = 0;
80 if ($_ =~ /^>scaff/) {
81 $gotScaf = 1;
82 $gotSigt = 0;
84 elsif ($_ =~ /^>C/) {
85 $gotSigt = 1;
86 $gotScaf = 0;
89 else {
90 $seq .= $_;
91 $len += length ($_) - 1;
94 handleOneSeq();
95 close IN;
96 close SCAF;
97 close SIGT;
99 print "Total scaffold number: $totalScafNum\nTotal scaffold length: $totalScafLen\nNumber of scaffold no shorter than $scafLen: $outputScafNum\nLength of scaffold no shorter than $scafLen: $outputScafLen\n\n";
100 print "Total singleton number: $totalSigtNum\nTotal singleton length: $totalSigtLen\nNumber of singleton no shorter than $sigtLen: $outputSigtNum\nLength of singleton no shorter than $sigtLen: $outputSigtLen\n";
102 #################################
103 ########## Sub Routine ##########
104 #################################
106 sub handleOneSeq {
107 if ($gotScaf == 1) {
108 $totalScafNum++;
109 $totalScafLen += $len;
110 if ($len >= $scafLen) {
111 $outputScafNum++;
112 $outputScafLen += $len;
113 print SCAF "$id$seq";
116 elsif ($gotSigt == 1) {
117 $totalSigtNum++;
118 $totalSigtLen += $len;
119 if ($len >= $sigtLen) {
120 $outputSigtNum++;
121 $outputSigtLen += $len;
122 print SIGT "$id$seq";