7 Program: seperateScaff.pl
13 Contact: soap@genomics.cn
15 This script is used to seperate singletons from scaffolds in *.scafSeq file
16 output by SOAPdenovo. There are two new output files as follow:
18 1) *.scafSeq.scaffold, containing scaffolds only.
20 2) *.scafSeq.singleton, containing singletons only.
22 User can set the minimun length of sequence to output.
26 perl seperateScaff.pl [options] >seperateScaff.log
28 --scafSeq *.scafSeq file output by SOAPdenovo, required
30 --scafLen minumum length of scaffold to output, default 0
32 --sigtLen minumum length of singleton to output, default 0
34 -h output help information
38 die `pod2text $0` if (@ARGV < 1);
40 my ($scafLen, $sigtLen) = (0, 0);
43 "scafSeq:s"=>\
$scafSeq,
44 "scafLen:i"=>\
$scafLen,
45 "sigtLen:i"=>\
$sigtLen,
49 die `pod2text $0` if ($help);
51 print "\n[Error] $scafSeq does not exist.\n\n";
56 print "\n[Warning] Set minimum length of scaffold is negative, program will use 0 instead.\n\n";
61 print "\n[Warning] Set minimum length of singleton is negative, program will use 0 instead.\n\n";
65 print "Command:\n perl $0 --scafSeq $scafSeq --scafLen $scafLen --sigtLen $sigtLen\n\n";
67 my ($gotScaf, $gotSigt, $id, $seq, $len) = (0, 0, "", "", 0);
68 my ($totalScafNum, $totalScafLen, $totalSigtNum, $totalSigtLen) = (0, 0, 0, 0);
69 my ($outputScafNum, $outputScafLen, $outputSigtNum, $outputSigtLen) = (0, 0, 0, 0);
70 open SCAF
, ">$scafSeq.scaffold" or die "Can't open output file: $scafSeq.scaffold\n";
71 open SIGT
, ">$scafSeq.singleton" or die "Can't open output file: $scafSeq.singleton\n";
72 open IN
, "$scafSeq" or die "Can't open input file: $scafSeq\n";
80 if ($_ =~ /^>scaff/) {
91 $len += length ($_) - 1;
99 print "Total scaffold number: $totalScafNum\nTotal scaffold length: $totalScafLen\nNumber of scaffold no shorter than $scafLen: $outputScafNum\nLength of scaffold no shorter than $scafLen: $outputScafLen\n\n";
100 print "Total singleton number: $totalSigtNum\nTotal singleton length: $totalSigtLen\nNumber of singleton no shorter than $sigtLen: $outputSigtNum\nLength of singleton no shorter than $sigtLen: $outputSigtLen\n";
102 #################################
103 ########## Sub Routine ##########
104 #################################
109 $totalScafLen += $len;
110 if ($len >= $scafLen) {
112 $outputScafLen += $len;
113 print SCAF
"$id$seq";
116 elsif ($gotSigt == 1) {
118 $totalSigtLen += $len;
119 if ($len >= $sigtLen) {
121 $outputSigtLen += $len;
122 print SIGT
"$id$seq";