7 E-mail: luoruibang@genomics.org.cn
9 This utility if a component of SOAP.
28 #include <boost/lexical_cast.hpp>
29 #include <boost/progress.hpp>
30 #include <boost/regex.hpp>
31 #include <boost/unordered_set.hpp>
32 #include <boost/unordered_map.hpp>
34 #include "threadmanager.h"
37 //Namespace Declaration
39 using namespace boost
;
42 #define DEPTH_SINGLE_LENGTH_PER_ROW 100 //Columns per row in -depthsingle
43 #define WINDOW_SIZE_LOWER_LIMIT 100 //The lowest threshold of window_size
46 #define DB(...) if(DEBUG) cerr<<"Debug:"<<'\t'<<__FILE__<<'\t'<<__LINE__<<'\t'<<(__VA_ARGS__)<<end;
50 typedef igzstream imethod
;
53 #define PROGRAM_NAME "SOAP.coverage"
54 #define PROGRAM_COMPILE_DATE __DATE__
55 #define PROGRAM_COMPILE_TIME __TIME__
57 // Define version number
58 #define VERSION_MAJOR 2
59 #define VERSION_MINOR 7
60 #define VERSION_PATCHLEVEL 9
61 #define VERSION_STRING "2.7.9"
63 // Macros dealing with VERSION
64 #define VERSION_NUM(a,b,c) (((a) << 16L) | ((b) << 8) | (c))
66 VERSION_NUM(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCHLEVEL)
69 "This utility can calculate sequencing coverage or physical coverage as well as duplication rate\n"\
70 "and details of specific block for each segments and whole genome by using SOAP, Blat, Blast, BlastZ,\n"\
71 "mummer and MAQ aligement results with multi-thread. Gzip file supported.\n"
75 " -cvg or -phy or -tag Selector for sequencing coverage mode, physical coverage mode or reads tag mode\n"\
76 " At least and only one should be selected!\n"\
77 " -refsingle [filename] Input reference fasta file used in SOAP\n"\
78 " -i [soap-file1 soap-file2 ...]\n"\
79 " Input several soap or soap gziped results by filenames.\n"\
80 " -il [soap-list] Input several soap or soap gziped results (absolute path!) with a soap-list file\n"\
81 " Caution: Only PE aligned results can be used in physical coverage!\n"\
82 " -il_single [SE aligned results list]\n"\
83 " -il_soap [PE aligned results list]\n"\
84 " -o [file-name] Results output with details\n"\
85 " -depth [directory] Output coverage of each bp in seperate files, a directory should be given\n"\
86 " -depthsingle [filename] Output coverage of each bp in a single file (text, fasta like)\n"\
87 " -nospace No space between plain depthsingle site depths\n"\
88 " -depthsinglebin [fn] Output coverage of each bp in a single file (Binary mode)\n"\
89 " -addn [filename] Input N block data for exclusion (marked as 65535 in depthsingle output)\n"\
90 " Input format: <segment_name> <start (leftmost as 1)> <end (half close)>\n"\
91 " -depthinput [filename] Input previous coverage data (Both Text or Binary) for faster accumulation\n"\
92 " -depthinputsam [filename]\n"\
93 " Input previous coverage data from Samtools\n"\
94 " -cdsinput [filename] Input specific block range for calculating coverage\n"\
95 " Input format: <segment_name> <start (leftmost as 1)> <end (half close)>\n"\
96 " -plot [filename] [x-axis lower] [x-axis upper]\n"\
97 " Output overall distribution of coverage of all segments\n"\
98 " -cdsplot [filename] [x-axis lower] [x-axis upper]\n"\
99 " Output distribution of coverage of specific blocks\n"\
100 " -cdsdetail [filename]\n"\
101 " Output coverage of each bp of each specific blocks in a single file\n"\
102 " -window [filename] [length]\n"\
103 " Output coverage averaged in a [length] long window to [filename]\n"\
104 " -p [num] Number of processors [Default:4]\n"\
105 " -trim [num] Exclude [num] bp(s) from head & tail of each segments\n"\
106 " -precisionOffset [num] Set the precision offset [Default:2]\n"\
107 " -minimumDepth [num] Set the minimum depth to take into consideration [Default:0]\n"\
108 " -maximumDepth [num] Set the maximum depth to take into consideration [Default:65535]\n"\
110 "Input format seletors:\n"\
111 " -general [id column] [start position column] [end position column]\n"\
112 " -generallen [id column] [start position column] [len]\n"\
113 " Specify column's (leftmost 1 based, right half close) for necessary informations\n"\
114 " Warning: Inputs with /^#/ are recognized as comments and ignored\n"\
115 " -plain Input is a three column list\n"\
116 " -sam Input is a standard SAM input file\n"\
117 " -pslquery Input is Blat for alculating query coverage.\n"\
118 " -pslsub Input is Blat for calculating subject coverage.\n"\
119 " -maq Input is MAQ output file.\n"\
120 " -m8subject Input is Blast m8 file for calculating subject coverage (reference should be subject).\n"\
121 " -m8query Input is Blast m8 file for calculating query coverage (reference should be query).\n"\
122 " -mummerquery [limit] Input mummer result file for calculating query coverage.\n"\
123 " -axtoitg Input Blastz axt file for calculating target coverage.\n"\
124 " -axtoiq Input Blastz axt file for calculating query coverage.\n"\
125 " -qmap Input qmap of bisulfite sequencing.\n"\
127 "Special functions:\n"\
128 " -sp [filename_in] [filename_out]\n"\
129 " Output S/P ratio data for post processing.\n"\
131 " ref start end name\n"\
132 " -pesupport [filename_in] [filename_out]\n"\
133 " Output pair-end reads on specific areas.\n"\
135 " ref start end name\n"\
136 " -onlyuniq Use reads those are uniquely mapped (column 4 in soap == 1).\n"\
137 " -precise Omit mismatched bp in soap results.\n"\
138 " -nowarning Cancel all possible warning.\n"\
139 " -nocalc Do not perform depth calculation.\n"\
140 " -onlycover Only output 0 or 1 for coverage calculation.\n"\
142 "Physical Coverage Specified Parameters:\n"\
143 " -duplicate [num] Exclude duplications, and gives the percentage of duplication. [num]=readlength\n"\
144 " -insertupper [num] Insert larger than num will be abandon [Default: 15000]\n"\
145 " -insertlower [num] Insert shorter thab num will be abandon [Default: 0]\n"\
148 " 1. Calculate several files of SOAP results.\n"\
149 " soap.coverage -cvg -i *.soap *.single -refsingle human.fa -o result.txt \n"\
151 " 2. Calculate a list of SOAP results, exclude Ns blocks, output depth to\n"\
152 " a file and plot coverage form depth 0 to 1000.\n"\
153 " soap.coverage -cvg -il soap.list -refsingle human.fa -o result.txt -depthsingle all.coverage -addn n.gap -plot distribution.txt 0 1000\n"\
155 " 3. Calculate a list of SOAP results, use only uniquely mapped reads, exclude Ns blocks\n"\
156 " , output depth to a file and plot coverage form depth 0 to 1000.\n"\
157 " soap.coverage -cvg -il soap.list -onlyuniq -refsingle human.fa -o result.txt -depthsingle all.coverage -addn n.gap -plot distribution.txt 0 1000\n"\
159 " 4. Add new SOAP results to depth(-depthsingle) already calculated &\n"\
160 " plot all data and specific blocks from depth 0 to 150, with 6 processors.\n"\
161 " soap.coverage -cvg -depthinput all.coverage -refsingle human.fa -il soap.list -p 6 -o result.txt -cdsinput cds.list -plot distribution.txt 0 150 -cdsplot distribution_cds.txt 0 150\n"\
163 " 5. Calculate physical coverage and duplication rate(read length=44) with\n"\
164 " insert between (avg-3SD, avg+SD)[avg=197, SD=9], with 8 processors\n"\
165 " soap.coverage -phy -il soap_without_single.list -refsingle human.fa -p 8 -o result.txt -duplicate 44 -insertlower 170 -insertupper 224\n"\
168 typedef unsigned short ushort
;
169 typedef unsigned int uint
;
170 typedef unsigned long ulong
;
171 typedef unordered_set
<string
> strsets
;
172 typedef unordered_map
<string
,int> strmaps
;
173 typedef unordered_map
<string
,pthread_mutex_t
*> semaphore_maps
;
174 typedef vector
<ushort
> vs
;
175 typedef vector
<uint
> vi
;
176 typedef vector
<pair
<string
, uint
> > vpsu
;
178 //Functions Declaration
179 void Intro(); //Program Head
180 void ParaDeal(int&, char**); //Dealing with parameters
181 void MakeMaps(); //Segments Shadowing form sets to maps
182 void BuildMemory(); //Alloc Memory
183 inline ulong
AddTotalBP(); //Use by BuildMemory(), cumulate quantity info in ref.
184 inline ulong
AddTotalBP_refsingle(); //Use by BuildMemory(), cumulate quantity info in refsingle.
185 void AddCover(); //-cvg
186 void Physical_AddCover(); //-phy
188 void OutputCoverage(); //-depthsingle
189 void OutputCoverageBin(); //-depthsinglebin
190 void OutputDistribution(); //-plot
191 void OutputWindow(); //-window
193 void CalcCDS(); //-cdsplot
194 void CalcPoint(); //-point
197 void PESupportOutput(); //-pesupport
198 void AddCvg(); //-depthinput
199 void AddCvgFromSamtools(); //-depthinputsam
200 void AddRefSingle(); //-refsingle
201 void AddFastat(); //-reffastat
202 void Output_list(); //Sub of result illustration
203 void ComfirmAllDigit(const char*); //Sub of ParaDeal()
204 inline uint
SeperateCigar(string
&, vector
<uint
> &); //Sub of _AddCover_Sub
207 bool cvg(false), phy(false), tag(false);
208 bool depth(false), depthsingle(false), nospace(false), depthsinglebin(false), ref(false), refsingle(false), reffastat(false),depthinput(false),depthinputsam(false),\
209 cdsinput(false), cdsplot(false), addon(false), addN(false), plot(false),cdsdetail(false), sam(false),\
210 window(false), pslsub(false), pslquery(false), maq(false), m8subject(false), m8query(false), axtoitg(false), axtoiq(false),\
211 nowarning(false), sp(false), il_single(false), il_soap(false), pesupport(false), onlyuniq(false),\
212 precise(false), gz(false), mummerquery(false), nocalc(false), onlycover(false), plain(false), qmap(false), general(false),\
214 map
<string
, ulong
> refsingle_marker
;
215 int trim(0), sprd_length(0), mummer_limit(0);
217 vector
<string
> iPos
, isPos
, ipPos
;
218 string oPos
, rPos
, dPos
, cPos
, cdsPos
, cdsPos_out
, cdsdetailPos
, pointPos
, pointOut
, nPos
, plotPos
, windowPos_out
, spIn
, spOut
;
219 uint
plot_upper(0), plot_lower(0), cds_plot_upper(0), cds_plot_lower(0);
221 ulong
totalBP(0),countBP(0);
222 uint
insert_Limit(15000);
223 uint
insert_LowerLimit(0);
224 uint
duplicate(0), useful_data(0), useless_data(0);
225 uint
precisionOffset(6);
226 ushort
minimumDepth(0);
227 ushort
maximumDepth(65535);
228 string temp
; //Quick inter-funtions messenger for BuildMemory().
229 ofstream OutData
; //Global output handler
232 semaphore_maps chr_sema
;
233 vector
<vs
> vector_vs
;
237 vector
<int> distribution(65536,0);
238 uint
intmax(numeric_limits
<int>::max());
239 uint
idCol(1), startCol(2), endCol(3);
241 //Multi-Thread global var
243 void* _AddCover_general(void*);
244 void* _AddCover_plain(void*);
245 void* _AddCover_sam(void*);
246 void* _AddCover_Sub(void*);
247 void* _AddCover_pslsub(void*);
248 void* _AddCover_pslquery(void*);
249 void* _AddCover_maq(void*);
250 void* _AddCover_m8subject(void*);
251 void* _AddCover_m8query(void*);
252 void* _AddCover_mummerquery(void*);
253 void* _AddCover_axtoitg(void*);
254 void* _AddCover_axtoiq(void*);
255 void* _AddCover_qmap(void*);
256 void* _Physical_AddCover_Sub(void*);
263 int main(int argc
, char ** argv
)
269 ParaDeal(argc
, argv
);
277 if(depthinput
) AddCvg();
278 if(depthinputsam
) AddCvgFromSamtools();
280 if(addon
&& cvg
) AddCover();
281 if(addon
&& phy
) Physical_AddCover();
283 if(sp
&& !pesupport
) SPOutput();
284 if(sp
&& pesupport
) PESupportOutput();
286 if(!nocalc
) CalcCoverage();
288 OutData
<<"Overall:"<<endl
289 <<"Total:"<<totalBP
<<endl
290 <<"Covered:"<<countBP
<<endl
291 <<"Percentage:"<<(double)countBP
/totalBP
*100<<endl
<<endl
;
294 OutData
<<"Duplication rate:"<<(double)useless_data
/(useful_data
+useless_data
)*100<<'%'<<endl
;
296 if(plot
) OutputDistribution();
298 if(window
) OutputWindow();
300 if(depth
|| depthsingle
) OutputCoverage();
302 if(depthsinglebin
) OutputCoverageBin();
304 if(cdsinput
) CalcCDS();
306 if(!pointPos
.empty() && !pointOut
.empty()) CalcPoint();
308 cout
<<"Overall:"<<endl
309 <<"Total:"<<totalBP
<<endl
310 <<"Covered:"<<countBP
<<endl
311 <<"Percentage:"<<(double)countBP
/totalBP
*100<<endl
;
314 cout
<<"Duplication rate:"<<(double)useless_data
/(useful_data
+useless_data
)*100<<'%'<<endl
;
318 cout
<<(elapsedtime
.elapsed())<<"s Elapsed!"<<endl
;
326 cerr
<<PROGRAM_NAME
<<endl
;
327 cerr
<<" Version: "<<VERSION_STRING
<<endl
;
328 cerr
<<"Complied at: "<<PROGRAM_COMPILE_DATE
<<' '<<PROGRAM_COMPILE_TIME
<<endl
;
329 cerr
<<" Author: RuiBang Luo"<<endl
;
330 cerr
<<" E-mail: luoruibang@genomics.org.cn"<<endl
<<endl
;
336 if(window_size
< WINDOW_SIZE_LOWER_LIMIT
)
338 cerr
<<"User defined window length is lower than the lowest threshold "<<WINDOW_SIZE_LOWER_LIMIT
<<endl
339 <<"Window length will set to "<<WINDOW_SIZE_LOWER_LIMIT
<<endl
;
340 window_size
= WINDOW_SIZE_LOWER_LIMIT
;
342 progress_display
ptcount(chrmaps
.size(), cout
, "Calculating Coverage in window...\n");
343 ofstream
O(windowPos_out
.c_str());
346 cerr
<<"Error opening window output file: "<<windowPos_out
<<endl
;
349 strmaps::iterator mpos
;
350 for(mpos
= chrmaps
.begin(); mpos
!= chrmaps
.end(); ++mpos
)
353 for(register int i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
;)
355 register int BPTotal(0);
356 register int BPCount(0);
357 register int j( (i
+window_size
) < limit
? i
+window_size
: limit
);
358 if(j
- i
< window_size
* 0.5)
362 if(vector_vs
[mpos
->second
][i
]==65535)
364 if(vector_vs
[mpos
->second
][i
] < minimumDepth
)
366 if(vector_vs
[mpos
->second
][i
] > maximumDepth
)
368 BPTotal
+= vector_vs
[mpos
->second
][i
];
371 O
<<mpos
->first
<<'\t'<<i
-window_size
<<'\t'<<j
-1<<'\t'<<BPCount
<<'\t'<<(double)BPTotal
/ BPCount
<<endl
;
376 void Physical_AddCover()
378 cout
<<"Summarizing Physical Coverage..."<<endl
;
379 CThreadManager
threadMgr(NUM_THREADS
);
380 threadMgr
.SetTimer(0.5);
381 vector
<param_struct
> vparam(iPos
.size());
382 for(register uint
i(0); i
<iPos
.size(); ++i
)
384 vparam
[i
].pos
= iPos
[i
];
385 threadMgr
.AddThread( _Physical_AddCover_Sub
, (void*)&vparam
[i
]);
390 void* _Physical_AddCover_Sub(void* param
)
392 param_struct
* pParam
= (param_struct
*) param
;
393 static ushort
progress(1);
394 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
;
395 if((pParam
->pos
).find(".single") != string::npos
)
397 cout
<<" Omitted!"<<endl
;
398 return (void *) NULL
;
403 string ctemp
, ctemp1
, ctemp2
, ctemp3
, ctemp4
, ctemp5
, ctemp6
, ctemp_pos
;
404 strmaps::iterator mpos
;
405 semaphore_maps::iterator semapos
;
407 InData_Sub
.open(pParam
->pos
.c_str());
408 //Var for duplication
409 multimap
<uint
,uint
> dup_atom
;
410 vector
<multimap
<uint
,uint
> > dup_vector(chrmaps
.size());
411 //multimap<uint,uint>::iterator p_dup_atom;
412 pair
<multimap
<uint
,uint
>::iterator
, multimap
<uint
,uint
>::iterator
> p_dup_atom_pair
;
413 //***************************************
416 register uint
end(0),start(0),read_length_neg(0),read_length_pos(0), start_dup(0), end_dup(0);
417 strmaps::iterator mpos
;
421 bool continue_mark(false);
423 InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp5
;
425 InData_Sub
>>ctemp_pos
;
429 InData_Sub
>>ctemp2
;InData_Sub
.ignore(100000,'\n');
431 InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp
;InData_Sub
>>ctemp6
;
433 InData_Sub
>>ctemp
;InData_Sub
>>ctemp3
;
435 InData_Sub
>>ctemp4
;InData_Sub
.ignore(100000,'\n');
440 getline(InData_Sub
, ctemp
, '\n');
441 cerr
<<"Unmatched chromosome context!"<<"\t"<<ctemp1
<<"\t"<<ctemp3
<<endl
;
444 if(atoi(ctemp2
.c_str()) > atoi(ctemp4
.c_str()))
446 swap(ctemp5
, ctemp6
);
447 swap(ctemp2
, ctemp4
);
449 read_length_neg
= atoi(ctemp6
.c_str());
450 read_length_pos
= atoi(ctemp5
.c_str());
451 if(read_length_neg
== 0 || read_length_pos
== 0 || ctemp2
.size() > 9 || ctemp4
.size() > 9)
453 cerr
<<"One row bypassed!"<<endl
;
456 start
= atoi(ctemp2
.c_str());
457 end
= atoi(ctemp4
.c_str()) + read_length_neg
- 1;
460 cerr
<<"end<=start"<<endl
;
463 if((end
-start
)>insert_Limit
|| (end
-start
)<insert_LowerLimit
) continue;
465 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
466 if(mpos
== chrmaps
.end()) continue;
469 start_dup
= start
; end_dup
= end
;
470 if(read_length_neg
< duplicate
)
472 start_dup
= start_dup
- (duplicate
- read_length_neg
-1);
474 if(read_length_pos
< duplicate
)
476 end_dup
= atoi(ctemp4
.c_str()) + duplicate
- 2;
478 p_dup_atom_pair
= dup_vector
[mpos
->second
].equal_range(start_dup
);
479 if (p_dup_atom_pair
.first
== p_dup_atom_pair
.second
)
481 dup_vector
[mpos
->second
].insert(make_pair
<uint
, uint
>(start_dup
, end_dup
));
485 for(;p_dup_atom_pair
.first
!= p_dup_atom_pair
.second
; ++p_dup_atom_pair
.first
)
487 if(p_dup_atom_pair
.first
->second
== end_dup
)
490 continue_mark
= true;
494 if (!continue_mark
) dup_vector
[mpos
->second
].insert(make_pair
<uint
, uint
>(start_dup
, end_dup
));
497 if (continue_mark
) continue;
501 if(start
>= vector_vs
[mpos
->second
].size()) continue;
502 if(end
>= vector_vs
[mpos
->second
].size()) end
= vector_vs
[mpos
->second
].size() - 1;
506 for(register size_t i(start
); i
<= end
; ++i
)
508 if(sp_vvs
[mpos
->second
][i
] == 0)
510 if(sp_vvs
[mpos
->second
][i
] == cache
)
512 cache
= sp_vvs
[mpos
->second
][i
];
513 ++(sp_sui_soap
[sp_vvs
[mpos
->second
][i
]].second
);
514 sp_sui_soap
[sp_vvs
[mpos
->second
][i
]].first
+= ("\t" + lexical_cast
<string
>(end
- start
+ 1 - read_length_neg
- read_length_pos
));
518 if(!InData_Sub
) break;
519 if(start
>= vector_vs
[mpos
->second
].size()) continue;
520 if(end
>= vector_vs
[mpos
->second
].size()) end
= vector_vs
[mpos
->second
].size()-1;
521 for(register uint
j(start
),k(end
); j
<=k
; ++j
)
522 ++(vector_vs
[mpos
->second
][j
]);
526 void OutputDistribution()
528 ofstream
O(plotPos
.c_str());
531 cerr
<<"Error opening plot output file: "<<plotPos
<<endl
;
534 for(register int i(plot_lower
); i
<=plot_upper
; ++i
)
535 //if(distribution[i])
536 O
<<setw(5)<<i
<<setw(12)<<distribution
[i
]<<endl
;
539 void OutputCoverage()
541 progress_display
ptcount(chrmaps
.size(), cout
, "Output Coverage to files (Text)...\n");
543 strmaps::iterator mpos
;
544 string delim
= nospace
? "": " ";
547 for(mpos
= chrmaps
.begin(); mpos
!= chrmaps
.end(); ++mpos
)
550 temp
= dPos
+ '/'+ mpos
->first
+ ".coverage";
551 ofstream
depthData(temp
.c_str());
552 depthData
<<'>'<<mpos
->first
<<"\n";
555 length
= DEPTH_SINGLE_LENGTH_PER_ROW
;
559 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
561 if(vector_vs
[mpos
->second
][i
])
565 if (i
&& i
%length
== 0) depthData
<<'\n';
570 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
572 depthData
<<vector_vs
[mpos
->second
][i
]<<delim
;
573 if (i
&& i
%length
== 0) depthData
<<'\n';
580 length
= DEPTH_SINGLE_LENGTH_PER_ROW
;
582 ofstream
depthData(temp
.c_str());
583 for(mpos
= chrmaps
.begin(); mpos
!= chrmaps
.end(); ++mpos
)
586 depthData
<<'>'<<mpos
->first
<<"\n";
589 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
591 if(vector_vs
[mpos
->second
][i
])
595 if (i
&& i
%length
== 0) depthData
<<'\n';
600 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
602 depthData
<<vector_vs
[mpos
->second
][i
]<<delim
;
603 if (i
&& i
%length
== 0) depthData
<<'\n';
611 void OutputCoverageBin()
613 progress_display
ptcount(chrmaps
.size(), cout
, "Output Coverage to files (Binary)...\n");
614 strmaps::iterator mpos
;
615 ofstream
depthData(dPos
.c_str(), ios_base::binary
| ios_base::out
| ios_base::trunc
);
616 ulong
len_tmp(vector_vs
.size());
617 //Output the quantity of segments
618 depthData
.write((char*)(&len_tmp
), sizeof(ulong
));
621 for(mpos
= chrmaps
.begin(); mpos
!= chrmaps
.end(); ++mpos
)
624 //Output the length of the name
625 len_tmp
= mpos
->first
.size();
626 depthData
.write((char*)(&len_tmp
), sizeof(ulong
));
628 depthData
.write(mpos
->first
.c_str(), mpos
->first
.size());
629 //Output the length of the segment
630 len_tmp
= (vector_vs
[mpos
->second
].size()-1);
631 depthData
.write((char*)(&len_tmp
), sizeof(ulong
));
636 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
637 //Output the single point detail
639 if(vector_vs
[mpos
->second
][i
])
640 depthData
.write((char*)(&s1
), sizeof(ushort
));
642 depthData
.write((char*)(&s0
), sizeof(ushort
));
647 for(register unsigned long i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
648 //Output the single point detail
649 depthData
.write((char*)(&vector_vs
[mpos
->second
][i
]), sizeof(ushort
));
656 progress_display
ptcount(chrmaps
.size(), cout
, "Calculating Coverage...\n");
657 //OutData<<"Coverage of each:\n";
658 strmaps::iterator mpos
;
659 for(mpos
= chrmaps
.begin(); mpos
!= chrmaps
.end(); ++mpos
)
661 OutData
<<mpos
->first
<<": ";
664 ulong
depth_of_seq(0);
665 for(register int i(1),limit(vector_vs
[mpos
->second
].size()); i
<limit
; ++i
)
667 if(i
<trim
&& i
>=(limit
-trim
))
669 if(vector_vs
[mpos
->second
][i
]==65535)
672 if(vector_vs
[mpos
->second
][i
] < minimumDepth
)
674 if(vector_vs
[mpos
->second
][i
] > maximumDepth
)
676 if(vector_vs
[mpos
->second
][i
])
678 depth_of_seq
+= vector_vs
[mpos
->second
][i
];
681 ++distribution
[vector_vs
[mpos
->second
][i
]];
684 OutData
<<singleBP
<<'/'<<effectiveBP
<<"\t"<<setprecision(precisionOffset
)<<((double)singleBP
/effectiveBP
*100)<<"\t"<<setprecision(precisionOffset
)<<depth_of_seq
/(double)singleBP
<<endl
;
692 cout
<<"Marking S/P ratio area..."<<endl
;
693 //cout<<"Int Max: "<<intmax<<endl;
694 strmaps::iterator mpos
;
695 ifstream
SPIN(spIn
.c_str());
698 cerr
<<"Error opening S/P ratio input file: "<<spIn
<<endl
;
703 if(!pesupport
) sp_sui_single
.push_back(make_pair
<string
, uint
>("", 0));
704 sp_sui_soap
.push_back(make_pair
<string
, uint
>("", 0));
707 assert(mark
== sp_sui_soap
.size());
709 uint
itemp2(0),itemp3(0);
710 SPIN
>>stemp1
>>itemp2
>>itemp3
;
712 mpos
= chrmaps
.find(stemp1
);
713 if(mpos
== chrmaps
.end())
715 cerr
<<stemp1
<<" from S/P ratio input not found in reference!"<<endl
;
718 if(!(mark
== sp_sui_single
.size() && sp_sui_single
.size() == sp_sui_soap
.size()) && !pesupport
)
720 cerr
<<"!(mark == sp_sui_single.size() && sp_sui_single.size() == sp_sui_soap.size() && !pesupport)"<<endl
;
723 if(!pesupport
) sp_sui_single
.push_back(make_pair
<string
, uint
>(stemp1
+ "\t" + lexical_cast
<string
>(itemp2
) + "\t" + lexical_cast
<string
>(itemp3
), 0));
724 sp_sui_soap
.push_back(make_pair
<string
, uint
>(stemp1
+ "\t" + lexical_cast
<string
>(itemp2
) + "\t" + lexical_cast
<string
>(itemp3
), 0));
725 if(itemp3
>= sp_vvs
[mpos
->second
].size())
726 itemp3
= sp_vvs
[mpos
->second
].size() - 1;
727 for(register uint
j(itemp2
); j
<=itemp3
; ++j
)
728 sp_vvs
[mpos
->second
][j
] = mark
;
731 cerr
<<"Size of S/P ratio segments: "<<sp_sui_soap
.size()<<endl
;
732 cerr
<<"Size of S/P ratio scaffolds: "<<sp_vvs
.size()<<endl
;
737 ofstream
O(spOut
.c_str());
740 cerr
<<"Error opening S/P ratio output file!"<<endl
;
743 for(register size_t i(1); i
< sp_sui_single
.size(); ++i
)
744 O
<<sp_sui_single
[i
].first
<<'\t'<<sp_sui_single
[i
].second
<<'\t'<<sp_sui_soap
[i
].second
<<'\t'<<(sp_sui_soap
[i
].second
?(double)sp_sui_single
[i
].second
/sp_sui_soap
[i
].second
:0)<<endl
;
747 void PESupportOutput()
749 ofstream
O(spOut
.c_str());
752 cerr
<<"Error opening SPE support output file!"<<endl
;
755 for(register size_t i(1); i
< sp_sui_soap
.size(); ++i
)
756 O
<<sp_sui_soap
[i
].second
<<'\t'<<sp_sui_soap
[i
].first
<<endl
;
761 cout
<<"Summarizing CDS Coverage..."<<endl
;
762 strmaps::iterator mpos
;
763 ifstream
CDS(cdsPos
.c_str());
766 cerr
<<"Error opening cds input file: "<<cdsPos
<<endl
;
772 CDSO
.open(cdsPos_out
.c_str());
775 cerr
<<"Error opening cds output file: "<<cdsPos_out
<<endl
;
781 CDS_DETAIL
.open(cdsdetailPos
.c_str());
782 if(cdsdetail
&& !CDS_DETAIL
)
784 cerr
<<"Error opening cds details output file: "<<cdsdetailPos
<<endl
;
788 vector
<int> cds_distribution(65536,0);
791 string stemp1
,stemp2
,stemp3
,name
;
794 CDS
>>stemp1
>>stemp2
>>stemp3
;
796 name
= stemp1
+ "\t" + stemp2
+ "\t" + stemp3
;
797 int itemp2(0),itemp3(0);
798 itemp2
= atoi(stemp2
.c_str());
799 itemp3
= atoi(stemp3
.c_str());
800 mpos
= chrmaps
.find(stemp1
);
801 if(mpos
== chrmaps
.end()) continue;
805 int cvs_covered_sub(0);
806 for(register uint
j(itemp2
); j
<=itemp3
; ++j
)
808 if(j
< 1 || j
>= vector_vs
[mpos
->second
].size())
810 cerr
<<stemp1
<<'\t'<<stemp2
<<'\t'<<stemp3
<<'\t'<<" Overflow! Omitted."<<endl
;
813 if(vector_vs
[mpos
->second
][j
] != 65535 && vector_vs
[mpos
->second
][j
] > minimumDepth
&& vector_vs
[mpos
->second
][j
] < maximumDepth
)
815 cvg_total
+=vector_vs
[mpos
->second
][j
];
817 if(vector_vs
[mpos
->second
][j
])
822 //vector_vs[mpos->second][j] = 65535;
824 if(cdsdetail
) ss
<<vector_vs
[mpos
->second
][j
]<<' ';
825 ++cds_distribution
[vector_vs
[mpos
->second
][j
]];
829 CDS_DETAIL
<<">"<<name
<<'\t'<<itemp3
-itemp2
+1<<'\t'<<cvs_covered_sub
<<'\t'<<setprecision(6)<<((double)cvg_total
/cvs_covered_sub
)<<endl
;
830 CDS_DETAIL
<<ss
.str()<<endl
;
834 cout
<<"Coverage of specific regions: "<<(double)cds_covered
/cds_total
*100<<"%"<<endl
;
835 OutData
<<"Coverage of specific regions: "<<(double)cds_covered
/cds_total
*100<<"%"<<endl
;
837 for(register int i(cds_plot_lower
) ; i
<=cds_plot_upper
; ++i
)
838 if(cds_distribution
[i
])
839 CDSO
<<setw(5)<<i
<<setw(12)<<cds_distribution
[i
]<<endl
;
844 cout
<<"Summarizing Point Coverage..."<<endl
;
845 strmaps::iterator mpos
;
846 ifstream
CDS(pointPos
.c_str());
849 cerr
<<"Error opening point input file: "<<pointPos
<<endl
;
852 ofstream
CDSO(pointOut
.c_str());
855 cerr
<<"Error opening point output file: "<<pointOut
<<endl
;
860 string stemp1
,stemp2
,name
;
866 j
= atoi(stemp2
.c_str());
867 mpos
= chrmaps
.find(stemp1
);
868 if(mpos
== chrmaps
.end() || j
< 1 || j
>= vector_vs
[mpos
->second
].size())
870 CDSO
<< stemp1
<< "\t" << stemp2
<< "\t-1\n";
873 CDSO
<< stemp1
<< "\t" << stemp2
<< "\t" << vector_vs
[mpos
->second
][j
] << "\n";
880 strmaps::iterator mpos
;
881 ifstream
CVG(cPos
.c_str());
884 cerr
<<"Error opening Coverage file: "<<cPos
<<endl
;
887 //Judge what kind of file (text or binary)
889 CVG
.read(&firstChar
, 1);
893 progress_display
ptcount(chrmaps
.size(), cout
, "Importing Coverage (Text)...\n");
895 for(register unsigned long i(1);;++i
)
899 if(cdstemp
[0] == '>')
903 cdstemp
= cdstemp
.substr(1, string::npos
);
904 mpos
= chrmaps
.find(cdstemp
);
905 if(mpos
== chrmaps
.end())
907 cerr
<<"Sequence "<<cdstemp
<<" missing!"<<endl
;
908 cerr
<<"Wrong input depth file or reference file?"<<endl
;
914 vector_vs
[mpos
->second
][i
] = atoi(cdstemp
.c_str());
921 CVG
.open(cPos
.c_str(), ios_base::in
| ios_base::binary
);
922 ulong
chr_quantity(0);
924 CVG
.read((char*)(&chr_quantity
), sizeof(ulong
));
925 if(chr_quantity
!= chrmaps
.size())
927 cerr
<<"Segment quantity in reference file is different from depthinput file!"<<endl
928 <<" Reference: "<<chrmaps
.size()<<endl
929 <<"Depthinput: "<<chr_quantity
<<endl
;
932 progress_display
ptcount(chrmaps
.size(), cout
, "Importing Coverage (Binary)...\n");
934 for(int x(0); x
< chr_quantity
; ++x
,++ptcount
)
936 CVG
.read((char*)(&len_tmp
), sizeof(ulong
));
937 char* name_tmp
= new char[len_tmp
+1];
938 name_tmp
[len_tmp
] = '\0';
939 CVG
.read(name_tmp
, len_tmp
);
942 mpos
= chrmaps
.find(cdstemp
);
943 if(mpos
== chrmaps
.end())
945 cerr
<<"Sequence "<<cdstemp
<<" missing!"<<endl
;
946 cerr
<<"Wrong input depth file or reference file?"<<endl
;
949 CVG
.read((char*)(&len_tmp
), sizeof(ulong
));
951 for(register int i(1); i
< len_tmp
; ++i
)
952 CVG
.read((char*)(&vector_vs
[mpos
->second
][i
]), sizeof(ushort
));
957 void AddCvgFromSamtools()
959 strmaps::iterator mpos
;
960 igzstream
CVG(cPos
.c_str());
963 cerr
<<"Error opening Samtools Coverage file: "<<cPos
<<endl
;
966 //Judge what kind of file (text or binary)
967 progress_display
ptcount(chrmaps
.size(), cout
, "Importing Coverage (Samtools)...\n");
969 string chr
; size_t pos
, depth
;
972 CVG
>> chr
>> pos
>> depth
;
978 mpos
= chrmaps
.find(chr
);
979 if(mpos
== chrmaps
.end())
981 cerr
<<"Sequence "<<chr
<<" missing!"<<endl
;
982 cerr
<<"Wrong input depth file or reference file?"<<endl
;
988 vector_vs
[mpos
->second
][pos
] = depth
;
994 ifstream
N(nPos
.c_str());
997 cerr
<<"Error opening Gap file: "<<nPos
<<endl
;
1000 cerr
<<"Masking N gaps..."<<endl
;
1001 string stemp1
; int itemp2(0), itemp3(0);
1002 strmaps::iterator mpos
;
1005 N
>>stemp1
>>itemp2
>>itemp3
;
1007 mpos
= chrmaps
.find(stemp1
);
1008 if(mpos
== chrmaps
.end())
1010 cerr
<<"Error founding "<<stemp1
<<endl
;
1013 for(register uint
j(itemp2
); j
< itemp3
; ++j
)
1016 vector_vs
[mpos
->second
][j
]=65535;
1023 cout
<<"Summarizing Coverage..."<<endl
;
1024 CThreadManager
threadMgr(NUM_THREADS
);
1025 threadMgr
.SetTimer(0.5);
1026 vector
<param_struct
> vparam(iPos
.size());
1027 for(register uint
i(0); i
<iPos
.size(); ++i
)
1029 vparam
[i
].pos
= iPos
[i
];
1031 threadMgr
.AddThread( _AddCover_plain
, (void*)&vparam
[i
]);
1033 threadMgr
.AddThread( _AddCover_general
, (void*)&vparam
[i
]);
1035 threadMgr
.AddThread( _AddCover_sam
, (void*)&vparam
[i
]);
1037 threadMgr
.AddThread( _AddCover_pslsub
, (void*)&vparam
[i
]);
1039 threadMgr
.AddThread( _AddCover_pslquery
, (void*)&vparam
[i
]);
1041 threadMgr
.AddThread( _AddCover_maq
, (void*)&vparam
[i
]);
1043 threadMgr
.AddThread( _AddCover_m8subject
, (void*)&vparam
[i
]);
1045 threadMgr
.AddThread( _AddCover_m8query
, (void*)&vparam
[i
]);
1046 else if(mummerquery
)
1047 threadMgr
.AddThread( _AddCover_mummerquery
, (void*)&vparam
[i
]);
1049 threadMgr
.AddThread( _AddCover_axtoitg
, (void*)&vparam
[i
]);
1051 threadMgr
.AddThread( _AddCover_axtoiq
, (void*)&vparam
[i
]);
1053 threadMgr
.AddThread( _AddCover_qmap
, (void*)&vparam
[i
]);
1055 threadMgr
.AddThread( _AddCover_Sub
, (void*)&vparam
[i
]);
1060 void* _AddCover_plain(void* param
)
1062 string ctemp
, ctemp2
;
1064 strmaps::iterator mpos
;
1065 semaphore_maps::iterator semapos
;
1066 param_struct
* pParam
= (param_struct
*) param
;
1067 static ushort
progress(1);
1068 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1069 imethod
InData(pParam
->pos
.c_str());
1072 unsigned long end(0),start(0);
1075 InData
>>temp
>>ctemp2
>>ctemp
;
1076 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1077 if(mpos
== chrmaps
.end())
1079 start
= atol(ctemp2
.c_str());
1080 end
= atol(ctemp
.c_str());
1088 pthread_mutex_lock(semapos
->second
);
1089 if(end
> vector_vs
[mpos
->second
].size())
1090 end
= vector_vs
[mpos
->second
].size();
1091 for(; start
< end
; ++start
)
1093 if(vector_vs
[mpos
->second
][start
]<65534)
1094 ++(vector_vs
[mpos
->second
][start
]);
1096 pthread_mutex_unlock(semapos
->second
);
1100 void* _AddCover_general(void* param
)
1102 uint
rname(idCol
- 1), pos(startCol
- 1), seq(endCol
- 1);
1103 uint maxCol
= max(rname
, pos
);
1104 maxCol
= max(maxCol
, seq
);
1106 vector
<string
> vec_str
;
1107 strmaps::iterator mpos
;
1108 semaphore_maps::iterator semapos
;
1109 param_struct
* pParam
= (param_struct
*) param
;
1110 static ushort
progress(1);
1111 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1112 imethod
InData(pParam
->pos
.c_str());
1115 uint
end(0),start(0);
1118 getline(InData
, str
);
1121 if(!str
.empty() && str
[0]=='#')
1124 regex_split(back_inserter(vec_str
), str
);
1125 if(vec_str
.size() <= maxCol
)
1127 mpos
= chrmaps
.find(vec_str
[rname
]); semapos
= chr_sema
.find(vec_str
[rname
]);
1128 if(mpos
== chrmaps
.end())
1130 start
= atoi(vec_str
[pos
].c_str());
1131 (generalLen
) ? (end
= start
+ atoi(vec_str
[seq
].c_str())) : (end
= atoi(vec_str
[seq
].c_str()));
1137 pthread_mutex_lock(semapos
->second
);
1138 if(end
> vector_vs
[mpos
->second
].size())
1139 end
= vector_vs
[mpos
->second
].size();
1140 for(; start
< end
; ++start
)
1142 if(vector_vs
[mpos
->second
][start
]<65534)
1143 ++(vector_vs
[mpos
->second
][start
]);
1145 pthread_mutex_unlock(semapos
->second
);
1150 #define SINGLE_UNMAPED 0x0004
1151 #define PAIR_UNMAPED 0x0008
1153 void* _AddCover_sam(void* param
)
1155 enum str_col
{qname
=0, flag
, rname
, pos
, mapq
, cigar
, mrnm
, mpos2
, isize
, seq
, qual
, tagmark
, vtype
};
1157 vector
<string
> vec_str
;
1158 strmaps::iterator mpos
;
1159 semaphore_maps::iterator semapos
;
1160 param_struct
* pParam
= (param_struct
*) param
;
1161 static ushort
progress(1);
1162 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1163 imethod
InData(pParam
->pos
.c_str());
1166 uint
end(0),start(0);
1169 getline(InData
, str
);
1172 if(!str
.empty() && str
[0]=='@')
1175 regex_split(back_inserter(vec_str
), str
);
1176 mpos
= chrmaps
.find(vec_str
[rname
]); semapos
= chr_sema
.find(vec_str
[rname
]);
1177 if(atoi(vec_str
[flag
].c_str()) & (SINGLE_UNMAPED
))//| PAIR_UNMAPED))
1179 if(mpos
== chrmaps
.end())
1181 start
= atoi(vec_str
[pos
].c_str());
1182 end
= start
+ vec_str
[seq
].size() - 1;
1188 pthread_mutex_lock(semapos
->second
);
1189 if(end
> vector_vs
[mpos
->second
].size())
1190 end
= vector_vs
[mpos
->second
].size();
1191 for(; start
< end
; ++start
)
1193 if(vector_vs
[mpos
->second
][start
]<65534)
1194 ++(vector_vs
[mpos
->second
][start
]);
1196 pthread_mutex_unlock(semapos
->second
);
1200 void* _AddCover_Sub(void* param
)
1202 string ctemp
, ctemp2
, uniqmark
, mismatchmark
;
1204 strmaps::iterator mpos
;
1205 semaphore_maps::iterator semapos
;
1206 param_struct
* pParam
= (param_struct
*) param
;
1207 static ushort
progress(1);
1208 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1210 imethod
InData(pParam
->pos
.c_str());
1212 vpsu
* sp_sui
= &sp_sui_single
;
1215 for(register size_t i(0); i
< isPos
.size(); ++i
)
1216 if(isPos
[i
] == pParam
->pos
)
1217 sp_sui
= &sp_sui_single
;
1218 for(register size_t i(0); i
< ipPos
.size(); ++i
)
1219 if(ipPos
[i
] == pParam
->pos
)
1220 sp_sui
= &sp_sui_soap
;
1222 cerr
<<(pParam
->pos
)<<" not found in both il_single and il_soap!"<<endl
;
1225 vector
<uint
> mismatch_bypass_vector
;
1228 uint
length(0),start(0), mismatch(0);
1232 InData
>>ctemp
;InData
>>ctemp
;InData
>>ctemp
;InData
>>uniqmark
;
1234 if(onlyuniq
&& uniqmark
!="1")
1236 InData
.ignore(100000,'\n');
1240 InData
>>ctemp
;InData
>>ctemp2
;
1242 InData
>>ctemp
; InData
>>temp
;
1243 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1244 if(mpos
== chrmaps
.end())
1246 InData
.ignore(100000,'\n');
1248 cerr
<<"Omitted one row cuz \""<<temp
<<"\" not found!"<<endl
;
1253 if(ctemp
.size() > 9)
1255 InData
.ignore(100000,'\n');
1257 cerr
<<"Omitted one row cuz position\""<<ctemp
<<"\" make no sense!"<<endl
;
1258 continue; //Validation
1260 //10 for mismatchmark
1261 InData
>>mismatchmark
;
1262 start
= atoi(ctemp
.c_str());
1263 length
= atoi(ctemp2
.c_str());
1264 //bool continue_match(false);
1267 mismatch
= atoi(mismatchmark
.c_str());
1268 mismatch_bypass_vector
.clear();
1269 if((mismatch
== 1) || (mismatch
== 2))
1272 getline(InData
, mismatchpos
, '\n');
1273 vector
<string
> vec_mismatchpos
;
1274 regex_split(back_inserter(vec_mismatchpos
), mismatchpos
);
1275 //mismatchpos = vec_mismatchpos[vec_mismatchpos.size()-1];
1277 mismatch
= SeperateCigar(vec_mismatchpos
[vec_mismatchpos
.size()-1], mismatch_bypass_vector
);
1281 cerr
<<"Regex match error on string: "<<endl
1282 <<temp
<<'\t'<<start
<<'\t'<<length
<<'\t'<<mismatchpos
<<endl
;
1283 mismatch_bypass_vector
.clear();
1288 InData
.ignore(100000,'\n');
1291 InData
.ignore(100000,'\n');
1298 cerr
<<"Omitted one row cuz length equals to 0!"<<endl
;
1299 continue; //Validation
1303 pthread_mutex_lock(semapos
->second
);
1305 uint
j(start
+length
);
1306 if(j
> vector_vs
[mpos
->second
].size())
1307 j
= vector_vs
[mpos
->second
].size();
1308 for(uint
loopcount(0); start
<j
; ++start
,++loopcount
)
1311 for(uint
checkmismatch_loopcount(0); checkmismatch_loopcount
< mismatch_bypass_vector
.size(); ++checkmismatch_loopcount
)
1312 if(mismatch_bypass_vector
[checkmismatch_loopcount
] == loopcount
)
1314 if(vector_vs
[mpos
->second
][start
]<65534)
1315 ++(vector_vs
[mpos
->second
][start
]);
1320 uint
end(start
+length
-1);
1321 if(end
>= sp_vvs
[mpos
->second
].size())
1322 end
= sp_vvs
[mpos
->second
].size() - 1;
1323 //if(sp_vvs[mpos->second][start] >= (*sp_sui).size() || sp_vvs[mpos->second][end] >= (*sp_sui).size())
1325 //cerr<<sp_vvs[mpos->second][start]<<'\t'<<sp_vvs[mpos->second][end]<<'\t'<<((*sp_sui).size())<<endl;
1328 if(sp_vvs
[mpos
->second
][start
] == 0 && sp_vvs
[mpos
->second
][end
] == 0)
1330 else if(sp_vvs
[mpos
->second
][start
] != 0 && sp_vvs
[mpos
->second
][end
] == 0)
1331 ++((*sp_sui
)[sp_vvs
[mpos
->second
][start
]]).second
;
1332 else if(sp_vvs
[mpos
->second
][start
] == 0 && sp_vvs
[mpos
->second
][end
] != 0)
1333 ++((*sp_sui
)[sp_vvs
[mpos
->second
][end
]]).second
;
1334 else if(sp_vvs
[mpos
->second
][start
] == sp_vvs
[mpos
->second
][end
] && sp_vvs
[mpos
->second
][start
] != 0)
1335 ++((*sp_sui
)[sp_vvs
[mpos
->second
][start
]]).second
;
1336 else if(sp_vvs
[mpos
->second
][start
] != sp_vvs
[mpos
->second
][end
] && sp_vvs
[mpos
->second
][start
] != 0 && sp_vvs
[mpos
->second
][end
] != 0)
1338 ++((*sp_sui
)[sp_vvs
[mpos
->second
][start
]]).second
;
1339 ++((*sp_sui
)[sp_vvs
[mpos
->second
][end
]]).second
;
1343 cerr
<<sp_vvs
[mpos
->second
][start
]<<'\t'<<sp_vvs
[mpos
->second
][end
]<<" Exceptional S/P ratio condition!"<<endl
;
1346 pthread_mutex_unlock(semapos
->second
);
1350 void* _AddCover_pslsub(void* param
)
1352 string ctemp
, ctemp2
;
1353 string length
, substart
;
1354 vector
<string
> length_vec
, substart_vec
;
1357 strmaps::iterator mpos
;
1358 semaphore_maps::iterator semapos
;
1359 param_struct
* pParam
= (param_struct
*) param
;
1360 static ushort
progress(1);
1361 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1362 imethod
InData(pParam
->pos
.c_str());
1365 uint
end(0),start(0);
1369 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
;
1372 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1373 if(mpos
== chrmaps
.end())
1375 InData
.ignore(100000,'\n');
1378 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>length
;
1379 InData
>>ctemp
>>substart
;
1381 substart_vec
.clear();
1382 regex_split(back_inserter(length_vec
), length
, e
);
1383 regex_split(back_inserter(substart_vec
), substart
, e
);
1387 pthread_mutex_lock(semapos
->second
);
1388 for(uint
vec_it(0); vec_it
< length_vec
.size(); ++vec_it
)
1390 start
= atoi(substart_vec
[vec_it
].c_str());
1391 end
= start
+ atoi(length_vec
[vec_it
].c_str());
1392 if(end
>= vector_vs
[mpos
->second
].size())
1393 end
= vector_vs
[mpos
->second
].size();
1394 for(; start
< end
; ++start
)
1396 if(vector_vs
[mpos
->second
][start
]<65534)
1397 ++(vector_vs
[mpos
->second
][start
]);
1400 pthread_mutex_unlock(semapos
->second
);
1404 void* _AddCover_pslquery(void* param
)
1406 string ctemp
, ctemp2
;
1407 string length
, substart
;
1408 vector
<string
> length_vec
, substart_vec
;
1411 strmaps::iterator mpos
;
1412 semaphore_maps::iterator semapos
;
1413 param_struct
* pParam
= (param_struct
*) param
;
1414 static ushort
progress(1);
1415 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1416 imethod
InData(pParam
->pos
.c_str());
1419 uint
end(0),start(0);
1423 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
;
1426 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1427 if(mpos
== chrmaps
.end())
1429 InData
.ignore(100000,'\n');
1432 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>length
;
1433 InData
>>substart
>>ctemp
;
1435 substart_vec
.clear();
1436 regex_split(back_inserter(length_vec
), length
, e
);
1437 regex_split(back_inserter(substart_vec
), substart
, e
);
1441 pthread_mutex_lock(semapos
->second
);
1442 for(uint
vec_it(0); vec_it
< length_vec
.size(); ++vec_it
)
1444 start
= atoi(substart_vec
[vec_it
].c_str());
1445 end
= start
+ atoi(length_vec
[vec_it
].c_str());
1446 if(end
>= vector_vs
[mpos
->second
].size())
1447 end
= vector_vs
[mpos
->second
].size();
1448 for(; start
< end
; ++start
)
1450 if(vector_vs
[mpos
->second
][start
]<65534)
1451 ++(vector_vs
[mpos
->second
][start
]);
1454 pthread_mutex_unlock(semapos
->second
);
1458 void* _AddCover_maq(void* param
)
1460 string ctemp
, ctemp2
;
1462 strmaps::iterator mpos
;
1463 semaphore_maps::iterator semapos
;
1464 param_struct
* pParam
= (param_struct
*) param
;
1465 static ushort
progress(1);
1466 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1467 imethod
InData(pParam
->pos
.c_str());
1470 uint
end(0),start(0);
1477 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1478 if(mpos
== chrmaps
.end())
1480 InData
.ignore(100000,'\n');
1486 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
;
1487 start
= atoi(ctemp2
.c_str());
1488 end
= atoi(ctemp
.c_str()) + start
- 1;
1489 InData
.ignore(100000,'\n');
1495 pthread_mutex_lock(semapos
->second
);
1496 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
<= end
; ++start
)
1498 if(start
>= limit
) break;
1499 if(vector_vs
[mpos
->second
][start
]<65534)
1500 ++(vector_vs
[mpos
->second
][start
]);
1502 pthread_mutex_unlock(semapos
->second
);
1506 void* _AddCover_m8subject(void* param
)
1508 string ctemp
, ctemp2
;
1510 strmaps::iterator mpos
;
1511 semaphore_maps::iterator semapos
;
1512 param_struct
* pParam
= (param_struct
*) param
;
1513 static ushort
progress(1);
1514 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1515 imethod
InData(pParam
->pos
.c_str());
1518 uint
end(0),start(0);
1525 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1526 if(mpos
== chrmaps
.end())
1528 InData
.ignore(100000,'\n');
1532 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp2
;
1535 start
= atoi(ctemp2
.c_str());
1536 end
= atoi(ctemp
.c_str());
1539 InData
.ignore(100000,'\n');
1543 pthread_mutex_lock(semapos
->second
);
1544 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
<= end
; ++start
)
1546 if(start
>= limit
) break;
1547 if(vector_vs
[mpos
->second
][start
]<65534)
1548 ++(vector_vs
[mpos
->second
][start
]);
1550 pthread_mutex_unlock(semapos
->second
);
1554 void* _AddCover_m8query(void* param
)
1556 string ctemp
, ctemp2
;
1558 strmaps::iterator mpos
;
1559 semaphore_maps::iterator semapos
;
1560 param_struct
* pParam
= (param_struct
*) param
;
1561 static ushort
progress(1);
1562 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1563 imethod
InData(pParam
->pos
.c_str());
1566 uint
end(0),start(0);
1571 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1572 if(mpos
== chrmaps
.end())
1574 InData
.ignore(100000,'\n');
1578 InData
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp
>>ctemp2
;
1581 start
= atoi(ctemp2
.c_str());
1582 end
= atoi(ctemp
.c_str());
1585 InData
.ignore(100000,'\n');
1589 pthread_mutex_lock(semapos
->second
);
1590 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
<= end
; ++start
)
1592 if(start
>= limit
) break;
1593 if(vector_vs
[mpos
->second
][start
]<65534)
1594 ++(vector_vs
[mpos
->second
][start
]);
1596 pthread_mutex_unlock(semapos
->second
);
1600 void* _AddCover_mummerquery(void* param
)
1602 string ctemp
, ctemp2
;
1604 strmaps::iterator mpos
;
1605 semaphore_maps::iterator semapos
;
1606 param_struct
* pParam
= (param_struct
*) param
;
1607 static ushort
progress(1);
1608 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1609 imethod
InData(pParam
->pos
.c_str());
1611 bool four_column(false);
1612 //mummer format judgement
1615 while(temp
.find(">") != string::npos
)
1616 getline(InData
, temp
);
1617 stringstream
ss(temp
);
1634 uint
end(0),start(0);
1635 bool reverse_mark(false);
1636 bool read_signal(true);
1640 //For special mark or ref or ref position
1647 if(!temp
.empty() && temp
[0] == '>')
1649 reverse_mark
= false;
1652 mpos
= chrmaps
.find(name
); semapos
= chr_sema
.find(name
);
1653 if(mpos
== chrmaps
.end())
1655 cerr
<<"Fatal error! Name "<<name
<<" in mummer result file missing in reference, input file bypassed!"<<endl
;
1656 return (void*)(NULL
);
1659 if(temp
.find("Reverse") != string::npos
)
1661 reverse_mark
= true;
1666 read_signal
= false;
1673 //For query position
1678 //For query position
1684 if(atoi(ctemp2
.c_str()) < mummer_limit
)
1686 /*if(atoi(ctemp2.c_str()) > 100000)
1688 cerr<<"Record "<<ctemp<<'\t'<<ctemp2<<" > 100000"<<"! Omited."<<endl;
1694 end
= vector_vs
[mpos
->second
].size() - atoi(ctemp
.c_str());
1695 start
= end
- atoi(ctemp2
.c_str());
1699 start
= atoi(ctemp
.c_str());
1700 end
= start
+ atoi(ctemp2
.c_str());
1709 pthread_mutex_lock(semapos
->second
);
1710 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
< end
; ++start
)
1712 if(start
>= limit
) break;
1713 if(vector_vs
[mpos
->second
][start
]<65534)
1714 ++(vector_vs
[mpos
->second
][start
]);
1716 pthread_mutex_unlock(semapos
->second
);
1720 void* _AddCover_qmap(void* param
)
1722 string
temp(">"), strTemp
;
1723 strmaps::iterator mpos
;
1724 semaphore_maps::iterator semapos
;
1725 param_struct
* pParam
= (param_struct
*) param
;
1726 static ushort
progress(1);
1727 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1728 imethod
InData(pParam
->pos
.c_str());
1730 uint
met(0), uMet(0);
1736 getline(InData
, temp
, '\n');
1739 if(!temp
.empty() && temp
[0] == '>')
1742 pthread_mutex_unlock(semapos
->second
);
1745 string
name(temp
.substr(1, temp
.find_first_of(" \t") - 1));
1746 mpos
= chrmaps
.find(name
); semapos
= chr_sema
.find(name
);
1747 if(mpos
== chrmaps
.end())
1749 cerr
<<"Fatal error! Name "<<name
<<" in qmap missing in reference, input file bypassed!"<<endl
;
1750 return (void*)(NULL
);
1752 pthread_mutex_lock(semapos
->second
);
1757 stringstream
ss(temp
);
1758 ss
>>strTemp
>>met
>>uMet
;
1759 vector_vs
[mpos
->second
][position
] += (met
+ uMet
);
1764 void* _AddCover_axtoitg(void* param
)
1766 string ctemp
, ctemp2
;
1768 strmaps::iterator mpos
;
1769 semaphore_maps::iterator semapos
;
1770 param_struct
* pParam
= (param_struct
*) param
;
1771 static ushort
progress(1);
1772 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1773 imethod
InData(pParam
->pos
.c_str());
1776 uint
end(0),start(0);
1781 if(InData
&& (temp
.empty() || isalpha(temp
[0])))
1784 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1785 if(mpos
== chrmaps
.end())
1787 InData
.ignore(100000,'\n');
1794 start
= atoi(ctemp2
.c_str());
1795 end
= atoi(ctemp
.c_str());
1796 InData
.ignore(100000,'\n');
1800 pthread_mutex_lock(semapos
->second
);
1801 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
<= end
; ++start
)
1803 if(start
>= limit
) break;
1804 if(vector_vs
[mpos
->second
][start
]<65534)
1805 ++(vector_vs
[mpos
->second
][start
]);
1807 pthread_mutex_unlock(semapos
->second
);
1811 void* _AddCover_axtoiq(void* param
)
1813 string ctemp
, ctemp2
;
1816 strmaps::iterator mpos
;
1817 semaphore_maps::iterator semapos
;
1818 param_struct
* pParam
= (param_struct
*) param
;
1819 static ushort
progress(1);
1820 cout
<<progress
++<<'/'<<iPos
.size()<<": "<<pParam
->pos
<<endl
;
1821 imethod
InData(pParam
->pos
.c_str());
1824 uint
end(0),start(0);
1829 if(InData
&& (temp
.empty() || isalpha(temp
[0])))
1831 InData
>>temp
>>temp
>>temp
>>temp
;
1832 mpos
= chrmaps
.find(temp
); semapos
= chr_sema
.find(temp
);
1833 if(mpos
== chrmaps
.end())
1835 InData
.ignore(100000,'\n');
1844 if(!strand
.empty() && strand
[0]=='-')
1846 start
= vector_vs
[mpos
->second
].size() - atoi(ctemp
.c_str());
1847 end
= vector_vs
[mpos
->second
].size() - atoi(ctemp2
.c_str());
1851 start
= atoi(ctemp2
.c_str());
1852 end
= atoi(ctemp
.c_str());
1854 InData
.ignore(100000,'\n');
1858 pthread_mutex_lock(semapos
->second
);
1859 for(register uint
limit(vector_vs
[mpos
->second
].size()); start
<= end
; ++start
)
1861 if(start
>= limit
) break;
1862 if(vector_vs
[mpos
->second
][start
]<65534)
1863 ++(vector_vs
[mpos
->second
][start
]);
1865 pthread_mutex_unlock(semapos
->second
);
1873 ifstream
refData(temp
.c_str());
1876 cerr
<<"Error while opening: "<<temp
<<endl
;
1879 getline(refData
,ctemp
); //Abandon the first information line
1880 while(refData
>>ctemp
) singleBP
+=ctemp
.size();
1881 totalBP
+= (singleBP
- trim
* 2);
1882 return (singleBP
+1);
1885 ulong
AddTotalBP_refsingle()
1888 refsingle_marker
[temp
];
1891 ulong
AddTotalBP_fastat()
1894 refsingle_marker
[temp
];
1899 progress_display
ptcount(chr
.size(), cout
, "Building Memory Blocks...\n");
1900 strsets::iterator pos
;
1901 for(pos
=chr
.begin(); pos
!=chr
.end(); ++pos
)
1908 singleBP
=AddTotalBP_refsingle();
1913 singleBP
=AddTotalBP_fastat();
1917 temp
= rPos
+ '/' + *pos
+ ".fa";
1918 singleBP
=AddTotalBP();
1921 vector_vs
.push_back(vs(singleBP
,0));
1923 sp_vvs
.push_back(vi(singleBP
, 0));
1929 cout
<<"Shadowing Map..."<<endl
;
1930 strsets::iterator pos
;
1931 for(pos
=chr
.begin(); pos
!=chr
.end(); ++pos
)
1934 chrmaps
.insert(make_pair(*pos
,i
));
1935 chr_sema
.insert(make_pair(*pos
, new pthread_mutex_t
));
1936 if(pthread_mutex_init(chr_sema
[*pos
], NULL
))
1938 cerr
<<"Mutex on "<<*pos
<<" initialization error!"<<endl
;
1943 cerr
<<"Mutex Lock created: "<<chr_sema
.size()<<endl
;
1946 void ParaDeal(int & argc
,char** argv
)
1955 cout
<<endl
<<"Parameters List:";
1957 for(register int i(1),next(0); i
<argc
; i
++)
1966 if (strcmp(argv
[i
],"-h") == 0 || strcmp(argv
[i
],"-help") == 0)
1971 if (strcmp(argv
[i
], "-precisionOffset") == 0 || strcmp(argv
[i
], "-po") == 0)
1973 ComfirmAllDigit(argv
[i
+1]);
1974 precisionOffset
= atoi(argv
[i
+1]);
1978 if (strcmp(argv
[i
], "-minimumDepth") == 0 || strcmp(argv
[i
], "-mind") == 0)
1980 ComfirmAllDigit(argv
[i
+1]);
1981 minimumDepth
= atoi(argv
[i
+1]);
1985 if (strcmp(argv
[i
], "-maximumDepth") == 0 || strcmp(argv
[i
], "-maxd") == 0)
1987 ComfirmAllDigit(argv
[i
+1]);
1988 maximumDepth
= atoi(argv
[i
+1]);
1992 if (strcmp(argv
[i
],"-nowarning") == 0 || strcmp(argv
[i
],"-nw") == 0)
1997 if (strcmp(argv
[i
],"-onlyuniq") == 0 || strcmp(argv
[i
],"-ou") == 0)
2002 if (strcmp(argv
[i
],"-precise") == 0)
2007 if (strcmp(argv
[i
],"-plain") == 0)
2012 if (strcmp(argv
[i
],"-sam") == 0)
2017 if (strcmp(argv
[i
],"-qmap") == 0)
2022 if (strcmp(argv
[i
],"-pslquery") == 0)
2027 if (strcmp(argv
[i
],"-pslsub") == 0)
2032 if (strcmp(argv
[i
],"-maq") == 0)
2037 if (strcmp(argv
[i
],"-m8subject") == 0)
2042 if (strcmp(argv
[i
],"-m8query") == 0)
2047 if (strcmp(argv
[i
],"-mummerquery") == 0)
2050 ComfirmAllDigit(argv
[i
+1]);
2051 mummer_limit
= atoi(argv
[i
+1]);
2052 if(mummer_limit
< 0) mummer_limit
= 20;
2056 if (strcmp(argv
[i
],"-axtoitg") == 0)
2061 if (strcmp(argv
[i
],"-axtoiq") == 0)
2066 if (strcmp(argv
[i
],"-cvg") == 0)
2070 cerr
<<"Parameter \"-cvg\" can't be used with \"-phy\""<<endl
;
2076 if (strcmp(argv
[i
],"-nospace") == 0)
2080 if (strcmp(argv
[i
],"-phy") == 0)
2084 cerr
<<"Parameter \"-phy\" can't be used with \"-cvg\""<<endl
;
2090 if (strcmp(argv
[i
],"-tag") == 0)
2094 cerr
<<"Parameter \"-tag\" can't be used with \"-phy\""<<endl
;
2101 if (strcmp(argv
[i
],"-nocalc") == 0 || strcmp(argv
[i
],"-nc") == 0)
2106 if (strcmp(argv
[i
],"-onlycover") == 0 || strcmp(argv
[i
],"-oc") == 0)
2111 if (strcmp(argv
[i
],"-p") == 0)
2113 ComfirmAllDigit(argv
[i
+1]);
2114 NUM_THREADS
= atoi(argv
[i
+1]);
2118 if (strcmp(argv
[i
],"-plot") == 0)
2123 cerr
<<"\n-plot parameter format error!\n";
2126 plotPos
= argv
[i
+1];
2127 ComfirmAllDigit(argv
[i
+2]);
2128 ComfirmAllDigit(argv
[i
+3]);
2129 plot_lower
= atoi(argv
[i
+2]);
2130 plot_upper
= atoi(argv
[i
+3]);
2131 if(plot_lower
< 0) plot_lower
= 0;
2132 if(plot_upper
> 65534) plot_upper
= 65534;
2136 if (strcmp(argv
[i
],"-general") == 0 || strcmp(argv
[i
],"-generallen") == 0)
2138 if(strcmp(argv
[i
],"-general") == 0)
2141 general
= generalLen
= true;
2144 cerr
<<"\n-general or -generalLen parameter format error!\n";
2147 ComfirmAllDigit(argv
[i
+1]);
2148 ComfirmAllDigit(argv
[i
+2]);
2149 ComfirmAllDigit(argv
[i
+3]);
2150 idCol
= atoi(argv
[i
+1]);
2151 startCol
= atoi(argv
[i
+2]);
2152 endCol
= atoi(argv
[i
+3]);
2156 if (strcmp(argv
[i
],"-duplicate") == 0)
2158 ComfirmAllDigit(argv
[i
+1]);
2159 duplicate
= atoi(argv
[i
+1]);
2163 if (strcmp(argv
[i
],"-insertupper") == 0 || strcmp(argv
[i
],"-iu") == 0)
2165 ComfirmAllDigit(argv
[i
+1]);
2166 insert_Limit
= atoi(argv
[i
+1]);
2170 if (strcmp(argv
[i
],"-insertlower") == 0 || strcmp(argv
[i
],"-ilo") == 0)
2172 ComfirmAllDigit(argv
[i
+1]);
2173 insert_LowerLimit
= atoi(argv
[i
+1]);
2177 if (strcmp(argv
[i
],"-addn") == 0)
2184 if (strcmp(argv
[i
],"-depthinput") == 0 || strcmp(argv
[i
],"-di") == 0)
2191 if (strcmp(argv
[i
],"-depthinputsam") == 0 || strcmp(argv
[i
],"-dis") == 0)
2198 if (strcmp(argv
[i
],"-cdsinput") == 0 || strcmp(argv
[i
],"-ci") == 0)
2205 if (strcmp(argv
[i
],"-point") == 0)
2207 pointPos
= argv
[i
+1];
2208 pointOut
= argv
[i
+2];
2212 if (strcmp(argv
[i
],"-sp") == 0)
2220 if (strcmp(argv
[i
],"-pesupport") == 0 || strcmp(argv
[i
],"-ps") == 0)
2229 if (strcmp(argv
[i
],"-cdsplot") == 0 || strcmp(argv
[i
],"-cp") == 0)
2232 cdsPos_out
= argv
[i
+1];
2233 ComfirmAllDigit(argv
[i
+2]);
2234 ComfirmAllDigit(argv
[i
+3]);
2235 cds_plot_lower
= atoi(argv
[i
+2]);
2236 cds_plot_upper
= atoi(argv
[i
+3]);
2237 if(cds_plot_lower
< 0) cds_plot_lower
= 0;
2238 if(cds_plot_upper
> 65534) cds_plot_upper
= 65534;
2242 if (strcmp(argv
[i
],"-cdsdetail") == 0 || strcmp(argv
[i
],"-cd") == 0)
2245 cdsdetailPos
= argv
[i
+1];
2249 if (strcmp(argv
[i
],"-window") == 0)
2252 windowPos_out
= argv
[i
+1];
2253 ComfirmAllDigit(argv
[i
+2]);
2254 window_size
= atoi(argv
[i
+2]);
2255 if(window_size
< 0) window_size
= 0;
2259 if (strcmp(argv
[i
],"-trim") == 0)
2261 ComfirmAllDigit(argv
[i
+1]);
2262 trim
= atoi(argv
[i
+1]);
2266 if (strcmp(argv
[i
],"-depth") == 0)
2273 if (strcmp(argv
[i
],"-depthsingle") == 0 || strcmp(argv
[i
],"-ds") == 0)
2280 if (strcmp(argv
[i
],"-depthsinglebin") == 0 || strcmp(argv
[i
],"-dsb") == 0)
2283 depthsinglebin
=true;
2287 if (strcmp(argv
[i
],"-o") == 0)
2293 if (strcmp(argv
[i
],"-i") == 0)
2296 for(register int j(1); (i
+j
)<argc
; ++j
)
2298 if (argv
[i
+j
][0] == '-') break;
2299 igzstream
InData_sub(argv
[i
+j
]);
2302 cerr
<<"Error opening Soap file:"<<argv
[i
+j
]<<endl
;
2305 iPos
.push_back(argv
[i
+j
]);
2310 if (strcmp(argv
[i
],"-il") == 0)
2313 igzstream
InData(argv
[i
+1]);
2316 cerr
<<"Error opening Soap list-file:"<<argv
[i
+1]<<endl
;
2319 for(InData
>>ctemp
; InData
; InData
>>ctemp
)
2321 iPos
.push_back(ctemp
);
2323 for(register uint
i(0); i
<iPos
.size(); ++i
)
2325 igzstream
InData_sub(iPos
[i
].c_str());
2328 cerr
<<"Error opening Soap file:"<<iPos
[i
]<<endl
;
2335 if (strcmp(argv
[i
],"-il_single") == 0)
2339 igzstream
InData(argv
[i
+1]);
2342 cerr
<<"Error opening Soap single list-file:"<<argv
[i
+1]<<endl
;
2345 for(InData
>>ctemp
; InData
; InData
>>ctemp
)
2347 iPos
.push_back(ctemp
);
2348 isPos
.push_back(ctemp
);
2350 for(register uint
i(0); i
<isPos
.size(); ++i
)
2352 igzstream
InData_sub(isPos
[i
].c_str());
2355 cerr
<<"Error opening Soap single file:"<<isPos
[i
]<<endl
;
2362 if (strcmp(argv
[i
],"-il_soap") == 0)
2366 igzstream
InData(argv
[i
+1]);
2369 cerr
<<"Error opening Soap PE list-file:"<<argv
[i
+1]<<endl
;
2372 for(InData
>>ctemp
; InData
; InData
>>ctemp
)
2374 iPos
.push_back(ctemp
);
2375 ipPos
.push_back(ctemp
);
2377 for(register uint
i(0); i
<ipPos
.size(); ++i
)
2379 igzstream
InData_sub(ipPos
[i
].c_str());
2382 cerr
<<"Error opening Soap PE file:"<<ipPos
[i
]<<endl
;
2389 if (strcmp(argv
[i
],"-ref") == 0)
2392 igzstream
InData(argv
[i
+1]);
2395 cerr
<<"Error opening Reference list file: "<<argv
[i
+1]<<endl
;
2398 for(InData
>>temp
; InData
; InData
>>temp
)
2400 rPos
= temp
.substr(0,temp
.rfind('/'));
2401 temp
=temp
.substr((temp
.rfind('/')+1), string::npos
);
2402 temp
=temp
.substr(0, temp
.find(".fa"));
2408 if (strcmp(argv
[i
],"-refsingle") == 0 || strcmp(argv
[i
],"-rs") == 0)
2415 if (strcmp(argv
[i
],"-reffastat") == 0 || strcmp(argv
[i
],"-rfs") == 0)
2424 cout
<<"Parameter error on \""<<argv
[i
]<<"\""<<endl
;
2428 cout
<<endl
<<argv
[i
]<<' ';
2430 cout
<<endl
<<"# End of parameters list"<<endl
<<endl
;
2432 //Parameter Logicals
2433 if((cvg
&& phy
) || (!cvg
&& !phy
))
2435 cerr
<<"At least and only one of the \"-cvg\", \"-phy\" or \"-tag\" should be selected!"<<endl
;
2440 cerr
<<"-pslsub should only be used with -cvg other than -phy!"<<endl
;
2445 cerr
<<"-pslquery should only be used with -cvg other than -phy!"<<endl
;
2450 cerr
<<"-maq should only be used with -cvg other than -phy!"<<endl
;
2455 cerr
<<"-m8 should only be used with -cvg other than -phy!"<<endl
;
2458 if(depth
&& depthsingle
)
2460 cerr
<<"Only one of the \"-depth\" or \"-depthsingle\" could be selected!"<<endl
;
2463 if(depthsinglebin
&& depthsingle
)
2465 cerr
<<"Only one of the \"-depthsinglebin\" or \"-depthsingle\" could be selected!"<<endl
;
2470 cerr<<"\"-o\" should be defined!"<<endl;
2473 if(duplicate
&& depthinput
)
2475 cerr
<<"\"-duplication\" can't be used with \"-depthinput\"!"<<endl
;
2478 if(sp
&& !pesupport
&& (!il_single
|| !il_soap
))
2480 cerr
<<"\"-sp\" should be used with \"-il_single\" & \"il_soap\"!"<<endl
;
2483 if(sp
&& phy
&& !pesupport
)
2485 cerr
<<"\"-sp\" should not be used with \"-phy\""<<endl
;
2488 if(!phy
&& pesupport
)
2490 cerr
<<"\"-pesupport\" should be used with \"-phy\""<<endl
;
2498 //Opening General Output file
2500 cerr
<<"General output (-o) undefined, disabled!"<<endl
;
2503 OutData
.open(oPos
.c_str());
2506 cerr
<<"Error opening output file!"<<endl
;
2516 igzstream
InData(rPos
.c_str());
2519 cerr
<<"Error opening Reference file!";
2522 cout
<<"Picking out segments from reference file..."<<endl
2523 <<"Number of segments:"<<setw(10)<<'0'<<flush
;
2524 for(register uint
i(0); InData
;++i
)
2526 ulong
seg_length(0);
2528 getline(InData
, ctemp
, '\n');
2532 temp
=temp
.substr(1,(temp
.find_first_of(" \t")-1));
2535 getline(InData
, ctemp
, '\n');
2537 if(ctemp
[0] == '>') break;
2538 seg_length
+= ctemp
.size();
2540 totalBP
+= (seg_length
- trim
* 2);
2541 if(!nowarning
&& refsingle_marker
.find(temp
) != refsingle_marker
.end())
2543 cerr
<<"Error! There are repeating segment name in reference!"<<endl
;
2546 refsingle_marker
.insert(make_pair
<string
, ulong
>(temp
, seg_length
+ 1));
2547 cout
<<"\b\b\b\b\b\b\b\b\b\b"
2548 <<setw(10)<<refsingle_marker
.size()<<flush
;
2552 cerr
<<"Marker \">\" not found in line "<<ctemp
<<"in refsingle!"<<endl
;
2566 igzstream
InData(rPos
.c_str());
2569 cerr
<<"Error opening Reference file!";
2572 cout
<<"Picking out segments from FaStat file..."<<endl
2573 <<"Number of segments:"<<setw(10)<<'0'<<flush
;
2574 for(register uint
i(0); InData
;++i
)
2576 string name
, size
, size_withoutn
;
2579 InData
>>name
>>size
>>size_withoutn
;
2582 //name=name.substr(1,(temp.find_first_of(" \t")-1));
2583 totalBP
+= (atol(size
.c_str()) - trim
* 2);
2584 if(!nowarning
&& refsingle_marker
.find(temp
) != refsingle_marker
.end())
2586 cerr
<<"Error! There are repeating segment name in FaStat!"<<endl
;
2589 refsingle_marker
.insert(make_pair
<string
, ulong
>(name
, atol(size
.c_str()) + 1));
2590 cout
<<"\b\b\b\b\b\b\b\b\b\b"
2591 <<setw(10)<<refsingle_marker
.size()<<flush
;
2603 cout
<<"Output List:"<<endl
;
2604 cout
<<setw(2)<<i
++<<": "<<oPos
<<endl
;
2606 cout
<<setw(2)<<i
++<<": "<<dPos
<<"/*"<<endl
;
2608 cout
<<setw(2)<<i
++<<": "<<dPos
<<endl
;
2610 cout
<<setw(2)<<i
++<<": "<<cdsPos_out
<<endl
;
2613 void ComfirmAllDigit(const char* rhs
)
2615 for(register uint
i(0); i
<strlen(rhs
); ++i
)
2617 if(!isdigit(rhs
[i
]))
2619 cerr
<<rhs
<<" is not a number!"<<endl
;
2625 inline uint
SeperateCigar(string
& str
, vector
<uint
> & vec
)
2630 for(register int i(0); i
< str
.size(); ++i
)
2632 if(isalpha(str
[i
]) || str
[i
] == '-')
2635 accumulate
+= atoi(pos
.c_str());
2636 vec
.push_back(accumulate
+ character
);