modified: nfig1.py
[GalaxyCodeBases.git] / BGI / SOAPcoverage / soap.coverage.cpp
bloba401f3196d3ca9a0ba92ed9a764954507ef42503
1 /*
2 Enchantez!
4 Soap.Coverage
6 Author: Aqua Lo
7 E-mail: luoruibang@genomics.org.cn
9 This utility if a component of SOAP.
12 #define _REENTRANT
13 #include <iostream>
14 #include <iomanip>
15 #include <fstream>
16 #include <vector>
17 #include <set>
18 #include <map>
19 #include <string>
20 #include <sstream>
21 #include <cstring>
22 #include <cstdlib>
23 #include <cctype>
24 #include <cmath>
25 #include <cassert>
26 #include <limits>
27 #include <pthread.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/progress.hpp>
30 #include <boost/regex.hpp>
31 #include <boost/unordered_set.hpp>
32 #include <boost/unordered_map.hpp>
33 #include "gzstream.h"
34 #include "threadmanager.h"
37 //Namespace Declaration
38 using namespace std;
39 using namespace boost;
41 //Constant
42 #define DEPTH_SINGLE_LENGTH_PER_ROW 100 //Columns per row in -depthsingle
43 #define WINDOW_SIZE_LOWER_LIMIT 100 //The lowest threshold of window_size
44 //Debug Preparations
45 #define DEBUG false;
46 #define DB(...) if(DEBUG) cerr<<"Debug:"<<'\t'<<__FILE__<<'\t'<<__LINE__<<'\t'<<(__VA_ARGS__)<<end;
49 //Input Method
50 typedef igzstream imethod;
52 //Program Label
53 #define PROGRAM_NAME "SOAP.coverage"
54 #define PROGRAM_COMPILE_DATE __DATE__
55 #define PROGRAM_COMPILE_TIME __TIME__
57 // Define version number
58 #define VERSION_MAJOR 2
59 #define VERSION_MINOR 7
60 #define VERSION_PATCHLEVEL 9
61 #define VERSION_STRING "2.7.9"
63 // Macros dealing with VERSION
64 #define VERSION_NUM(a,b,c) (((a) << 16L) | ((b) << 8) | (c))
65 #define VERSION \
66 VERSION_NUM(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCHLEVEL)
68 #define README \
69 "This utility can calculate sequencing coverage or physical coverage as well as duplication rate\n"\
70 "and details of specific block for each segments and whole genome by using SOAP, Blat, Blast, BlastZ,\n"\
71 "mummer and MAQ aligement results with multi-thread. Gzip file supported.\n"
73 #define USAGE \
74 "Parameters:\n"\
75 " -cvg or -phy or -tag Selector for sequencing coverage mode, physical coverage mode or reads tag mode\n"\
76 " At least and only one should be selected!\n"\
77 " -refsingle [filename] Input reference fasta file used in SOAP\n"\
78 " -i [soap-file1 soap-file2 ...]\n"\
79 " Input several soap or soap gziped results by filenames.\n"\
80 " -il [soap-list] Input several soap or soap gziped results (absolute path!) with a soap-list file\n"\
81 " Caution: Only PE aligned results can be used in physical coverage!\n"\
82 " -il_single [SE aligned results list]\n"\
83 " -il_soap [PE aligned results list]\n"\
84 " -o [file-name] Results output with details\n"\
85 " -depth [directory] Output coverage of each bp in seperate files, a directory should be given\n"\
86 " -depthsingle [filename] Output coverage of each bp in a single file (text, fasta like)\n"\
87 " -nospace No space between plain depthsingle site depths\n"\
88 " -depthsinglebin [fn] Output coverage of each bp in a single file (Binary mode)\n"\
89 " -addn [filename] Input N block data for exclusion (marked as 65535 in depthsingle output)\n"\
90 " Input format: <segment_name> <start (leftmost as 1)> <end (half close)>\n"\
91 " -depthinput [filename] Input previous coverage data (Both Text or Binary) for faster accumulation\n"\
92 " -depthinputsam [filename]\n"\
93 " Input previous coverage data from Samtools\n"\
94 " -cdsinput [filename] Input specific block range for calculating coverage\n"\
95 " Input format: <segment_name> <start (leftmost as 1)> <end (half close)>\n"\
96 " -plot [filename] [x-axis lower] [x-axis upper]\n"\
97 " Output overall distribution of coverage of all segments\n"\
98 " -cdsplot [filename] [x-axis lower] [x-axis upper]\n"\
99 " Output distribution of coverage of specific blocks\n"\
100 " -cdsdetail [filename]\n"\
101 " Output coverage of each bp of each specific blocks in a single file\n"\
102 " -window [filename] [length]\n"\
103 " Output coverage averaged in a [length] long window to [filename]\n"\
104 " -p [num] Number of processors [Default:4]\n"\
105 " -trim [num] Exclude [num] bp(s) from head & tail of each segments\n"\
106 " -precisionOffset [num] Set the precision offset [Default:2]\n"\
107 " -minimumDepth [num] Set the minimum depth to take into consideration [Default:0]\n"\
108 " -maximumDepth [num] Set the maximum depth to take into consideration [Default:65535]\n"\
109 "\n"\
110 "Input format seletors:\n"\
111 " -general [id column] [start position column] [end position column]\n"\
112 " -generallen [id column] [start position column] [len]\n"\
113 " Specify column's (leftmost 1 based, right half close) for necessary informations\n"\
114 " Warning: Inputs with /^#/ are recognized as comments and ignored\n"\
115 " -plain Input is a three column list\n"\
116 " -sam Input is a standard SAM input file\n"\
117 " -pslquery Input is Blat for alculating query coverage.\n"\
118 " -pslsub Input is Blat for calculating subject coverage.\n"\
119 " -maq Input is MAQ output file.\n"\
120 " -m8subject Input is Blast m8 file for calculating subject coverage (reference should be subject).\n"\
121 " -m8query Input is Blast m8 file for calculating query coverage (reference should be query).\n"\
122 " -mummerquery [limit] Input mummer result file for calculating query coverage.\n"\
123 " -axtoitg Input Blastz axt file for calculating target coverage.\n"\
124 " -axtoiq Input Blastz axt file for calculating query coverage.\n"\
125 " -qmap Input qmap of bisulfite sequencing.\n"\
126 "\n"\
127 "Special functions:\n"\
128 " -sp [filename_in] [filename_out]\n"\
129 " Output S/P ratio data for post processing.\n"\
130 " Column:\n"\
131 " ref start end name\n"\
132 " -pesupport [filename_in] [filename_out]\n"\
133 " Output pair-end reads on specific areas.\n"\
134 " Column:\n"\
135 " ref start end name\n"\
136 " -onlyuniq Use reads those are uniquely mapped (column 4 in soap == 1).\n"\
137 " -precise Omit mismatched bp in soap results.\n"\
138 " -nowarning Cancel all possible warning.\n"\
139 " -nocalc Do not perform depth calculation.\n"\
140 " -onlycover Only output 0 or 1 for coverage calculation.\n"\
141 "\n"\
142 "Physical Coverage Specified Parameters:\n"\
143 " -duplicate [num] Exclude duplications, and gives the percentage of duplication. [num]=readlength\n"\
144 " -insertupper [num] Insert larger than num will be abandon [Default: 15000]\n"\
145 " -insertlower [num] Insert shorter thab num will be abandon [Default: 0]\n"\
146 "\n"\
147 "Example:\n"\
148 " 1. Calculate several files of SOAP results.\n"\
149 " soap.coverage -cvg -i *.soap *.single -refsingle human.fa -o result.txt \n"\
150 "\n"\
151 " 2. Calculate a list of SOAP results, exclude Ns blocks, output depth to\n"\
152 " a file and plot coverage form depth 0 to 1000.\n"\
153 " soap.coverage -cvg -il soap.list -refsingle human.fa -o result.txt -depthsingle all.coverage -addn n.gap -plot distribution.txt 0 1000\n"\
154 "\n"\
155 " 3. Calculate a list of SOAP results, use only uniquely mapped reads, exclude Ns blocks\n"\
156 " , output depth to a file and plot coverage form depth 0 to 1000.\n"\
157 " soap.coverage -cvg -il soap.list -onlyuniq -refsingle human.fa -o result.txt -depthsingle all.coverage -addn n.gap -plot distribution.txt 0 1000\n"\
158 "\n"\
159 " 4. Add new SOAP results to depth(-depthsingle) already calculated &\n"\
160 " plot all data and specific blocks from depth 0 to 150, with 6 processors.\n"\
161 " soap.coverage -cvg -depthinput all.coverage -refsingle human.fa -il soap.list -p 6 -o result.txt -cdsinput cds.list -plot distribution.txt 0 150 -cdsplot distribution_cds.txt 0 150\n"\
162 "\n"\
163 " 5. Calculate physical coverage and duplication rate(read length=44) with\n"\
164 " insert between (avg-3SD, avg+SD)[avg=197, SD=9], with 8 processors\n"\
165 " soap.coverage -phy -il soap_without_single.list -refsingle human.fa -p 8 -o result.txt -duplicate 44 -insertlower 170 -insertupper 224\n"\
167 //Typedef
168 typedef unsigned short ushort;
169 typedef unsigned int uint;
170 typedef unsigned long ulong;
171 typedef unordered_set<string> strsets;
172 typedef unordered_map<string,int> strmaps;
173 typedef unordered_map<string,pthread_mutex_t*> semaphore_maps;
174 typedef vector<ushort> vs;
175 typedef vector<uint> vi;
176 typedef vector<pair<string, uint> > vpsu;
178 //Functions Declaration
179 void Intro(); //Program Head
180 void ParaDeal(int&, char**); //Dealing with parameters
181 void MakeMaps(); //Segments Shadowing form sets to maps
182 void BuildMemory(); //Alloc Memory
183 inline ulong AddTotalBP(); //Use by BuildMemory(), cumulate quantity info in ref.
184 inline ulong AddTotalBP_refsingle(); //Use by BuildMemory(), cumulate quantity info in refsingle.
185 void AddCover(); //-cvg
186 void Physical_AddCover(); //-phy
187 void CalcCoverage();
188 void OutputCoverage(); //-depthsingle
189 void OutputCoverageBin(); //-depthsinglebin
190 void OutputDistribution(); //-plot
191 void OutputWindow(); //-window
192 void AddN(); //-addn
193 void CalcCDS(); //-cdsplot
194 void CalcPoint(); //-point
195 void SP(); //-sp
196 void SPOutput();
197 void PESupportOutput(); //-pesupport
198 void AddCvg(); //-depthinput
199 void AddCvgFromSamtools(); //-depthinputsam
200 void AddRefSingle(); //-refsingle
201 void AddFastat(); //-reffastat
202 void Output_list(); //Sub of result illustration
203 void ComfirmAllDigit(const char*); //Sub of ParaDeal()
204 inline uint SeperateCigar(string &, vector<uint> &); //Sub of _AddCover_Sub
206 //Global vars
207 bool cvg(false), phy(false), tag(false);
208 bool depth(false), depthsingle(false), nospace(false), depthsinglebin(false), ref(false), refsingle(false), reffastat(false),depthinput(false),depthinputsam(false),\
209 cdsinput(false), cdsplot(false), addon(false), addN(false), plot(false),cdsdetail(false), sam(false),\
210 window(false), pslsub(false), pslquery(false), maq(false), m8subject(false), m8query(false), axtoitg(false), axtoiq(false),\
211 nowarning(false), sp(false), il_single(false), il_soap(false), pesupport(false), onlyuniq(false),\
212 precise(false), gz(false), mummerquery(false), nocalc(false), onlycover(false), plain(false), qmap(false), general(false),\
213 generalLen(false);
214 map<string, ulong> refsingle_marker;
215 int trim(0), sprd_length(0), mummer_limit(0);
216 uint length(0);
217 vector<string> iPos, isPos, ipPos;
218 string oPos, rPos, dPos, cPos, cdsPos, cdsPos_out, cdsdetailPos, pointPos, pointOut, nPos, plotPos, windowPos_out, spIn, spOut;
219 uint plot_upper(0), plot_lower(0), cds_plot_upper(0), cds_plot_lower(0);
220 uint window_size(0);
221 ulong totalBP(0),countBP(0);
222 uint insert_Limit(15000);
223 uint insert_LowerLimit(0);
224 uint duplicate(0), useful_data(0), useless_data(0);
225 uint precisionOffset(6);
226 ushort minimumDepth(0);
227 ushort maximumDepth(65535);
228 string temp; //Quick inter-funtions messenger for BuildMemory().
229 ofstream OutData; //Global output handler
230 strsets chr;
231 strmaps chrmaps;
232 semaphore_maps chr_sema;
233 vector<vs> vector_vs;
234 vector<vi> sp_vvs;
235 vpsu sp_sui_single;
236 vpsu sp_sui_soap;
237 vector<int> distribution(65536,0);
238 uint intmax(numeric_limits<int>::max());
239 uint idCol(1), startCol(2), endCol(3);
241 //Multi-Thread global var
242 int NUM_THREADS(4);
243 void* _AddCover_general(void*);
244 void* _AddCover_plain(void*);
245 void* _AddCover_sam(void*);
246 void* _AddCover_Sub(void*);
247 void* _AddCover_pslsub(void*);
248 void* _AddCover_pslquery(void*);
249 void* _AddCover_maq(void*);
250 void* _AddCover_m8subject(void*);
251 void* _AddCover_m8query(void*);
252 void* _AddCover_mummerquery(void*);
253 void* _AddCover_axtoitg(void*);
254 void* _AddCover_axtoiq(void*);
255 void* _AddCover_qmap(void*);
256 void* _Physical_AddCover_Sub(void*);
258 struct param_struct
260 string pos;
263 int main(int argc, char ** argv)
265 timer elapsedtime;
267 Intro();
269 ParaDeal(argc, argv);
271 MakeMaps();
273 BuildMemory();
275 if(sp) SP();
277 if(depthinput) AddCvg();
278 if(depthinputsam) AddCvgFromSamtools();
279 if(addN) AddN();
280 if(addon && cvg) AddCover();
281 if(addon && phy) Physical_AddCover();
283 if(sp && !pesupport) SPOutput();
284 if(sp && pesupport) PESupportOutput();
286 if(!nocalc) CalcCoverage();
288 OutData<<"Overall:"<<endl
289 <<"Total:"<<totalBP<<endl
290 <<"Covered:"<<countBP<<endl
291 <<"Percentage:"<<(double)countBP/totalBP*100<<endl<<endl;
293 if(duplicate)
294 OutData<<"Duplication rate:"<<(double)useless_data/(useful_data+useless_data)*100<<'%'<<endl;
296 if(plot) OutputDistribution();
298 if(window) OutputWindow();
300 if(depth || depthsingle) OutputCoverage();
302 if(depthsinglebin) OutputCoverageBin();
304 if(cdsinput) CalcCDS();
306 if(!pointPos.empty() && !pointOut.empty()) CalcPoint();
308 cout<<"Overall:"<<endl
309 <<"Total:"<<totalBP<<endl
310 <<"Covered:"<<countBP<<endl
311 <<"Percentage:"<<(double)countBP/totalBP*100<<endl;
313 if(duplicate)
314 cout<<"Duplication rate:"<<(double)useless_data/(useful_data+useless_data)*100<<'%'<<endl;
316 Output_list();
318 cout<<(elapsedtime.elapsed())<<"s Elapsed!"<<endl;
320 return EXIT_SUCCESS;
323 void Intro()
325 cerr<<endl;
326 cerr<<PROGRAM_NAME<<endl;
327 cerr<<" Version: "<<VERSION_STRING<<endl;
328 cerr<<"Complied at: "<<PROGRAM_COMPILE_DATE<<' '<<PROGRAM_COMPILE_TIME<<endl;
329 cerr<<" Author: RuiBang Luo"<<endl;
330 cerr<<" E-mail: luoruibang@genomics.org.cn"<<endl<<endl;
331 cerr<<README<<endl;
334 void OutputWindow()
336 if(window_size < WINDOW_SIZE_LOWER_LIMIT)
338 cerr<<"User defined window length is lower than the lowest threshold "<<WINDOW_SIZE_LOWER_LIMIT<<endl
339 <<"Window length will set to "<<WINDOW_SIZE_LOWER_LIMIT<<endl;
340 window_size = WINDOW_SIZE_LOWER_LIMIT;
342 progress_display ptcount(chrmaps.size(), cout, "Calculating Coverage in window...\n");
343 ofstream O(windowPos_out.c_str());
344 if(!O)
346 cerr<<"Error opening window output file: "<<windowPos_out<<endl;
347 return;
349 strmaps::iterator mpos;
350 for(mpos = chrmaps.begin(); mpos != chrmaps.end(); ++mpos)
352 ++ptcount;
353 for(register int i(1),limit(vector_vs[mpos->second].size()); i<limit;)
355 register int BPTotal(0);
356 register int BPCount(0);
357 register int j( (i+window_size) < limit ? i+window_size : limit);
358 if(j - i < window_size * 0.5)
359 break;
360 for(; i<j ; ++i)
362 if(vector_vs[mpos->second][i]==65535)
363 continue;
364 if(vector_vs[mpos->second][i] < minimumDepth)
365 continue;
366 if(vector_vs[mpos->second][i] > maximumDepth)
367 continue;
368 BPTotal += vector_vs[mpos->second][i];
369 ++BPCount;
371 O<<mpos->first<<'\t'<<i-window_size<<'\t'<<j-1<<'\t'<<BPCount<<'\t'<<(double)BPTotal / BPCount<<endl;
376 void Physical_AddCover()
378 cout<<"Summarizing Physical Coverage..."<<endl;
379 CThreadManager threadMgr(NUM_THREADS);
380 threadMgr.SetTimer(0.5);
381 vector<param_struct> vparam(iPos.size());
382 for(register uint i(0); i<iPos.size(); ++i)
384 vparam[i].pos = iPos[i];
385 threadMgr.AddThread( _Physical_AddCover_Sub, (void*)&vparam[i]);
387 threadMgr.Run();
390 void* _Physical_AddCover_Sub(void* param)
392 param_struct* pParam = (param_struct*) param;
393 static ushort progress(1);
394 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos;
395 if((pParam->pos).find(".single") != string::npos)
397 cout<<" Omitted!"<<endl;
398 return (void *) NULL;
400 else
401 cout<<endl;
402 string temp;
403 string ctemp, ctemp1, ctemp2, ctemp3, ctemp4, ctemp5, ctemp6, ctemp_pos;
404 strmaps::iterator mpos;
405 semaphore_maps::iterator semapos;
406 imethod InData_Sub;
407 InData_Sub.open(pParam->pos.c_str());
408 //Var for duplication
409 multimap<uint,uint> dup_atom;
410 vector<multimap<uint,uint> > dup_vector(chrmaps.size());
411 //multimap<uint,uint>::iterator p_dup_atom;
412 pair<multimap<uint,uint>::iterator , multimap<uint,uint>::iterator > p_dup_atom_pair;
413 //***************************************
414 while(InData_Sub)
416 register uint end(0),start(0),read_length_neg(0),read_length_pos(0), start_dup(0), end_dup(0);
417 strmaps::iterator mpos;
418 for(;InData_Sub;)
420 ++useful_data;
421 bool continue_mark(false);
422 //- 6 for length
423 InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp5;
424 //- 7 for strand
425 InData_Sub>>ctemp_pos;
426 //- 8 for chr
427 InData_Sub>>ctemp1;
428 //- 9 for position
429 InData_Sub>>ctemp2;InData_Sub.ignore(100000,'\n');
430 //+ 6 for length
431 InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp;InData_Sub>>ctemp6;
432 //+ 8 for chr
433 InData_Sub>>ctemp;InData_Sub>>ctemp3;
434 //+ 9 for position
435 InData_Sub>>ctemp4;InData_Sub.ignore(100000,'\n');
437 //Judgement
438 if(ctemp1 != ctemp3)
440 getline(InData_Sub, ctemp, '\n');
441 cerr<<"Unmatched chromosome context!"<<"\t"<<ctemp1<<"\t"<<ctemp3<<endl;
442 continue;
444 if(atoi(ctemp2.c_str()) > atoi(ctemp4.c_str()))
446 swap(ctemp5, ctemp6);
447 swap(ctemp2, ctemp4);
449 read_length_neg = atoi(ctemp6.c_str());
450 read_length_pos = atoi(ctemp5.c_str());
451 if(read_length_neg == 0 || read_length_pos == 0 || ctemp2.size() > 9 || ctemp4.size() > 9)
453 cerr<<"One row bypassed!"<<endl;
454 continue;
456 start = atoi(ctemp2.c_str());
457 end = atoi(ctemp4.c_str()) + read_length_neg - 1;
458 if(end<=start)
460 cerr<<"end<=start"<<endl;
461 continue;
463 if((end-start)>insert_Limit || (end-start)<insert_LowerLimit) continue;
464 temp = ctemp1;
465 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
466 if(mpos == chrmaps.end()) continue;
467 if (duplicate)
469 start_dup = start; end_dup = end;
470 if(read_length_neg < duplicate)
472 start_dup = start_dup - (duplicate - read_length_neg -1);
474 if(read_length_pos < duplicate)
476 end_dup = atoi(ctemp4.c_str()) + duplicate - 2;
478 p_dup_atom_pair = dup_vector[mpos->second].equal_range(start_dup);
479 if (p_dup_atom_pair.first == p_dup_atom_pair.second)
481 dup_vector[mpos->second].insert(make_pair<uint, uint>(start_dup, end_dup));
483 else
485 for(;p_dup_atom_pair.first != p_dup_atom_pair.second; ++p_dup_atom_pair.first)
487 if(p_dup_atom_pair.first->second == end_dup)
489 ++useless_data;
490 continue_mark = true;
491 break;
494 if (!continue_mark) dup_vector[mpos->second].insert(make_pair<uint, uint>(start_dup, end_dup));
497 if (continue_mark) continue;
499 break;
501 if(start >= vector_vs[mpos->second].size()) continue;
502 if(end >= vector_vs[mpos->second].size()) end = vector_vs[mpos->second].size() - 1;
503 if(pesupport)
505 int cache(0);
506 for(register size_t i(start); i <= end; ++i)
508 if(sp_vvs[mpos->second][i] == 0)
509 continue;
510 if(sp_vvs[mpos->second][i] == cache)
511 continue;
512 cache = sp_vvs[mpos->second][i];
513 ++(sp_sui_soap[sp_vvs[mpos->second][i]].second);
514 sp_sui_soap[sp_vvs[mpos->second][i]].first += ("\t" + lexical_cast<string>(end - start + 1 - read_length_neg - read_length_pos));
518 if(!InData_Sub) break;
519 if(start >= vector_vs[mpos->second].size()) continue;
520 if(end >= vector_vs[mpos->second].size()) end = vector_vs[mpos->second].size()-1;
521 for(register uint j(start),k(end); j<=k; ++j)
522 ++(vector_vs[mpos->second][j]);
526 void OutputDistribution()
528 ofstream O(plotPos.c_str());
529 if(!O)
531 cerr<<"Error opening plot output file: "<<plotPos<<endl;
532 exit(EXIT_FAILURE);
534 for(register int i(plot_lower); i<=plot_upper; ++i)
535 //if(distribution[i])
536 O<<setw(5)<<i<<setw(12)<<distribution[i]<<endl;
539 void OutputCoverage()
541 progress_display ptcount(chrmaps.size(), cout, "Output Coverage to files (Text)...\n");
542 string temp;
543 strmaps::iterator mpos;
544 string delim = nospace ? "": " ";
545 if (depth)
547 for(mpos = chrmaps.begin(); mpos != chrmaps.end(); ++mpos)
549 ++ptcount;
550 temp = dPos + '/'+ mpos->first + ".coverage";
551 ofstream depthData(temp.c_str());
552 depthData<<'>'<<mpos->first<<"\n";
554 //Determine Length
555 length = DEPTH_SINGLE_LENGTH_PER_ROW;
557 if(onlycover)
559 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
561 if(vector_vs[mpos->second][i])
562 depthData<<1<<delim;
563 else
564 depthData<<0<<delim;
565 if (i && i%length == 0) depthData<<'\n';
568 else
570 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
572 depthData<<vector_vs[mpos->second][i]<<delim;
573 if (i && i%length == 0) depthData<<'\n';
578 else if(depthsingle)
580 length = DEPTH_SINGLE_LENGTH_PER_ROW;
581 temp = dPos;
582 ofstream depthData(temp.c_str());
583 for(mpos = chrmaps.begin(); mpos != chrmaps.end(); ++mpos)
585 ++ptcount;
586 depthData<<'>'<<mpos->first<<"\n";
587 if(onlycover)
589 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
591 if(vector_vs[mpos->second][i])
592 depthData<<1<<delim;
593 else
594 depthData<<0<<delim;
595 if (i && i%length == 0) depthData<<'\n';
598 else
600 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
602 depthData<<vector_vs[mpos->second][i]<<delim;
603 if (i && i%length == 0) depthData<<'\n';
606 depthData<<endl;
611 void OutputCoverageBin()
613 progress_display ptcount(chrmaps.size(), cout, "Output Coverage to files (Binary)...\n");
614 strmaps::iterator mpos;
615 ofstream depthData(dPos.c_str(), ios_base::binary | ios_base::out | ios_base::trunc);
616 ulong len_tmp(vector_vs.size());
617 //Output the quantity of segments
618 depthData.write((char*)(&len_tmp), sizeof(ulong));
621 for(mpos = chrmaps.begin(); mpos != chrmaps.end(); ++mpos)
623 ++ptcount;
624 //Output the length of the name
625 len_tmp = mpos->first.size();
626 depthData.write((char*)(&len_tmp), sizeof(ulong));
627 //Output the name
628 depthData.write(mpos->first.c_str(), mpos->first.size());
629 //Output the length of the segment
630 len_tmp = (vector_vs[mpos->second].size()-1);
631 depthData.write((char*)(&len_tmp), sizeof(ulong));
632 if(onlycover)
634 ushort s1(1);
635 ushort s0(0);
636 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
637 //Output the single point detail
639 if(vector_vs[mpos->second][i])
640 depthData.write((char*)(&s1), sizeof(ushort));
641 else
642 depthData.write((char*)(&s0), sizeof(ushort));
645 else
647 for(register unsigned long i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
648 //Output the single point detail
649 depthData.write((char*)(&vector_vs[mpos->second][i]), sizeof(ushort));
654 void CalcCoverage()
656 progress_display ptcount(chrmaps.size(), cout, "Calculating Coverage...\n");
657 //OutData<<"Coverage of each:\n";
658 strmaps::iterator mpos;
659 for(mpos = chrmaps.begin(); mpos != chrmaps.end(); ++mpos)
661 OutData<<mpos->first<<": ";
662 int singleBP(0);
663 int effectiveBP(0);
664 ulong depth_of_seq(0);
665 for(register int i(1),limit(vector_vs[mpos->second].size()); i<limit; ++i)
667 if(i<trim && i>=(limit-trim))
668 continue;
669 if(vector_vs[mpos->second][i]==65535)
670 continue;
671 ++effectiveBP;
672 if(vector_vs[mpos->second][i] < minimumDepth)
673 continue;
674 if(vector_vs[mpos->second][i] > maximumDepth)
675 continue;
676 if(vector_vs[mpos->second][i])
678 depth_of_seq += vector_vs[mpos->second][i];
679 ++singleBP;
681 ++distribution[vector_vs[mpos->second][i]];
683 countBP += singleBP;
684 OutData<<singleBP<<'/'<<effectiveBP<<"\t"<<setprecision(precisionOffset)<<((double)singleBP/effectiveBP*100)<<"\t"<<setprecision(precisionOffset)<<depth_of_seq/(double)singleBP<<endl;
685 ++ptcount;
687 OutData<<endl;
690 void SP()
692 cout<<"Marking S/P ratio area..."<<endl;
693 //cout<<"Int Max: "<<intmax<<endl;
694 strmaps::iterator mpos;
695 ifstream SPIN(spIn.c_str());
696 if(!SPIN)
698 cerr<<"Error opening S/P ratio input file: "<<spIn<<endl;
699 exit(EXIT_FAILURE);
701 string stemp;
702 int mark(1);
703 if(!pesupport) sp_sui_single.push_back(make_pair<string, uint>("", 0));
704 sp_sui_soap.push_back(make_pair<string, uint>("", 0));
705 for(;;)
707 assert(mark == sp_sui_soap.size());
708 string stemp1;
709 uint itemp2(0),itemp3(0);
710 SPIN>>stemp1>>itemp2>>itemp3;
711 if(!SPIN) break;
712 mpos = chrmaps.find(stemp1);
713 if(mpos == chrmaps.end())
715 cerr<<stemp1<<" from S/P ratio input not found in reference!"<<endl;
716 exit(EXIT_FAILURE);
718 if(!(mark == sp_sui_single.size() && sp_sui_single.size() == sp_sui_soap.size()) && !pesupport)
720 cerr<<"!(mark == sp_sui_single.size() && sp_sui_single.size() == sp_sui_soap.size() && !pesupport)"<<endl;
721 exit(EXIT_FAILURE);
723 if(!pesupport) sp_sui_single.push_back(make_pair<string, uint>(stemp1 + "\t" + lexical_cast<string>(itemp2) + "\t" + lexical_cast<string>(itemp3), 0));
724 sp_sui_soap.push_back(make_pair<string, uint>(stemp1 + "\t" + lexical_cast<string>(itemp2) + "\t" + lexical_cast<string>(itemp3), 0));
725 if(itemp3 >= sp_vvs[mpos->second].size())
726 itemp3 = sp_vvs[mpos->second].size() - 1;
727 for(register uint j(itemp2); j<=itemp3; ++j)
728 sp_vvs[mpos->second][j] = mark;
729 ++mark;
731 cerr<<"Size of S/P ratio segments: "<<sp_sui_soap.size()<<endl;
732 cerr<<"Size of S/P ratio scaffolds: "<<sp_vvs.size()<<endl;
735 void SPOutput()
737 ofstream O(spOut.c_str());
738 if(!O)
740 cerr<<"Error opening S/P ratio output file!"<<endl;
741 exit(EXIT_FAILURE);
743 for(register size_t i(1); i < sp_sui_single.size(); ++i)
744 O<<sp_sui_single[i].first<<'\t'<<sp_sui_single[i].second<<'\t'<<sp_sui_soap[i].second<<'\t'<<(sp_sui_soap[i].second?(double)sp_sui_single[i].second/sp_sui_soap[i].second:0)<<endl;
747 void PESupportOutput()
749 ofstream O(spOut.c_str());
750 if(!O)
752 cerr<<"Error opening SPE support output file!"<<endl;
753 exit(EXIT_FAILURE);
755 for(register size_t i(1); i < sp_sui_soap.size(); ++i)
756 O<<sp_sui_soap[i].second<<'\t'<<sp_sui_soap[i].first<<endl;
759 void CalcCDS()
761 cout<<"Summarizing CDS Coverage..."<<endl;
762 strmaps::iterator mpos;
763 ifstream CDS(cdsPos.c_str());
764 if(!CDS)
766 cerr<<"Error opening cds input file: "<<cdsPos<<endl;
767 exit(EXIT_FAILURE);
769 ofstream CDSO;
770 if(cdsplot)
772 CDSO.open(cdsPos_out.c_str());
773 if(!CDSO)
775 cerr<<"Error opening cds output file: "<<cdsPos_out<<endl;
776 exit(EXIT_FAILURE);
779 ofstream CDS_DETAIL;
780 if(cdsdetail)
781 CDS_DETAIL.open(cdsdetailPos.c_str());
782 if(cdsdetail && !CDS_DETAIL)
784 cerr<<"Error opening cds details output file: "<<cdsdetailPos<<endl;
785 exit(EXIT_FAILURE);
787 string stemp;
788 vector<int> cds_distribution(65536,0);
789 int cds_total(0);
790 int cds_covered(0);
791 string stemp1,stemp2,stemp3,name;
792 for(;;)
794 CDS>>stemp1>>stemp2>>stemp3;
795 if(!CDS) break;
796 name = stemp1 + "\t" + stemp2 + "\t" + stemp3;
797 int itemp2(0),itemp3(0);
798 itemp2 = atoi(stemp2.c_str());
799 itemp3 = atoi(stemp3.c_str());
800 mpos = chrmaps.find(stemp1);
801 if(mpos == chrmaps.end()) continue;
802 stringstream ss;
803 //int count(0);
804 int cvg_total(0);
805 int cvs_covered_sub(0);
806 for(register uint j(itemp2); j<=itemp3; ++j)
808 if(j < 1 || j>= vector_vs[mpos->second].size())
810 cerr<<stemp1<<'\t'<<stemp2<<'\t'<<stemp3<<'\t'<<" Overflow! Omitted."<<endl;
811 break;
813 if(vector_vs[mpos->second][j] != 65535 && vector_vs[mpos->second][j] > minimumDepth && vector_vs[mpos->second][j] < maximumDepth)
815 cvg_total+=vector_vs[mpos->second][j];
816 ++cds_total;
817 if(vector_vs[mpos->second][j])
819 ++cvs_covered_sub;
820 ++cds_covered;
822 //vector_vs[mpos->second][j] = 65535;
824 if(cdsdetail) ss<<vector_vs[mpos->second][j]<<' ';
825 ++cds_distribution[vector_vs[mpos->second][j]];
827 if(cdsdetail)
829 CDS_DETAIL<<">"<<name<<'\t'<<itemp3-itemp2+1<<'\t'<<cvs_covered_sub<<'\t'<<setprecision(6)<<((double)cvg_total/cvs_covered_sub)<<endl;
830 CDS_DETAIL<<ss.str()<<endl;
834 cout<<"Coverage of specific regions: "<<(double)cds_covered/cds_total*100<<"%"<<endl;
835 OutData<<"Coverage of specific regions: "<<(double)cds_covered/cds_total*100<<"%"<<endl;
836 if(cdsplot)
837 for(register int i(cds_plot_lower) ; i<=cds_plot_upper; ++i)
838 if(cds_distribution[i])
839 CDSO<<setw(5)<<i<<setw(12)<<cds_distribution[i]<<endl;
842 void CalcPoint()
844 cout<<"Summarizing Point Coverage..."<<endl;
845 strmaps::iterator mpos;
846 ifstream CDS(pointPos.c_str());
847 if(!CDS)
849 cerr<<"Error opening point input file: "<<pointPos<<endl;
850 exit(EXIT_FAILURE);
852 ofstream CDSO(pointOut.c_str());
853 if(!CDSO)
855 cerr<<"Error opening point output file: "<<pointOut<<endl;
856 exit(EXIT_FAILURE);
859 string stemp;
860 string stemp1,stemp2,name;
861 for(;;)
863 CDS>>stemp1>>stemp2;
864 if(!CDS) break;
865 int j(0);
866 j = atoi(stemp2.c_str());
867 mpos = chrmaps.find(stemp1);
868 if(mpos == chrmaps.end() || j < 1 || j>= vector_vs[mpos->second].size())
870 CDSO << stemp1 << "\t" << stemp2 << "\t-1\n";
871 continue;
873 CDSO << stemp1 << "\t" << stemp2 << "\t" << vector_vs[mpos->second][j] << "\n";
878 void AddCvg()
880 strmaps::iterator mpos;
881 ifstream CVG(cPos.c_str());
882 if(!CVG)
884 cerr<<"Error opening Coverage file: "<<cPos<<endl;
885 exit(EXIT_FAILURE);
887 //Judge what kind of file (text or binary)
888 char firstChar(0);
889 CVG.read(&firstChar, 1);
890 if(firstChar == '>')
892 CVG.seekg(0);
893 progress_display ptcount(chrmaps.size(), cout, "Importing Coverage (Text)...\n");
894 string cdstemp;
895 for(register unsigned long i(1);;++i)
897 CVG>>cdstemp;
898 if(!CVG) break;
899 if(cdstemp[0] == '>')
901 ++ptcount;
902 i = 0;
903 cdstemp = cdstemp.substr(1, string::npos);
904 mpos = chrmaps.find(cdstemp);
905 if(mpos == chrmaps.end())
907 cerr<<"Sequence "<<cdstemp<<" missing!"<<endl;
908 cerr<<"Wrong input depth file or reference file?"<<endl;
909 exit(EXIT_FAILURE);
911 continue;
913 else
914 vector_vs[mpos->second][i] = atoi(cdstemp.c_str());
917 else
919 CVG.close();
920 CVG.clear();
921 CVG.open(cPos.c_str(), ios_base::in | ios_base::binary);
922 ulong chr_quantity(0);
923 ulong len_tmp(0);
924 CVG.read((char*)(&chr_quantity), sizeof(ulong));
925 if(chr_quantity != chrmaps.size())
927 cerr<<"Segment quantity in reference file is different from depthinput file!"<<endl
928 <<" Reference: "<<chrmaps.size()<<endl
929 <<"Depthinput: "<<chr_quantity<<endl;
930 exit(EXIT_FAILURE);
932 progress_display ptcount(chrmaps.size(), cout, "Importing Coverage (Binary)...\n");
933 string cdstemp;
934 for(int x(0); x < chr_quantity; ++x,++ptcount)
936 CVG.read((char*)(&len_tmp), sizeof(ulong));
937 char* name_tmp = new char[len_tmp+1];
938 name_tmp[len_tmp] = '\0';
939 CVG.read(name_tmp, len_tmp);
940 cdstemp = name_tmp;
941 delete[] name_tmp;
942 mpos = chrmaps.find(cdstemp);
943 if(mpos == chrmaps.end())
945 cerr<<"Sequence "<<cdstemp<<" missing!"<<endl;
946 cerr<<"Wrong input depth file or reference file?"<<endl;
947 exit(EXIT_FAILURE);
949 CVG.read((char*)(&len_tmp), sizeof(ulong));
950 ++len_tmp;
951 for(register int i(1); i < len_tmp; ++i)
952 CVG.read((char*)(&vector_vs[mpos->second][i]), sizeof(ushort));
957 void AddCvgFromSamtools()
959 strmaps::iterator mpos;
960 igzstream CVG(cPos.c_str());
961 if(!CVG)
963 cerr<<"Error opening Samtools Coverage file: "<<cPos<<endl;
964 exit(EXIT_FAILURE);
966 //Judge what kind of file (text or binary)
967 progress_display ptcount(chrmaps.size(), cout, "Importing Coverage (Samtools)...\n");
968 string prevChr;
969 string chr; size_t pos, depth;
970 while(true)
972 CVG >> chr >> pos >> depth;
973 if(!CVG) break;
974 if(chr != prevChr)
976 prevChr = chr;
977 ++ptcount;
978 mpos = chrmaps.find(chr);
979 if(mpos == chrmaps.end())
981 cerr<<"Sequence "<<chr<<" missing!"<<endl;
982 cerr<<"Wrong input depth file or reference file?"<<endl;
983 exit(EXIT_FAILURE);
986 if(depth >= 65534)
987 depth = 65534;
988 vector_vs[mpos->second][pos] = depth;
992 void AddN()
994 ifstream N(nPos.c_str());
995 if(!N)
997 cerr<<"Error opening Gap file: "<<nPos<<endl;
998 exit(EXIT_FAILURE);
1000 cerr<<"Masking N gaps..."<<endl;
1001 string stemp1; int itemp2(0), itemp3(0);
1002 strmaps::iterator mpos;
1003 for(;;)
1005 N>>stemp1>>itemp2>>itemp3;
1006 if(!N) break;
1007 mpos = chrmaps.find(stemp1);
1008 if(mpos == chrmaps.end())
1010 cerr<<"Error founding "<<stemp1<<endl;
1011 continue;
1013 for(register uint j(itemp2); j < itemp3; ++j)
1015 --totalBP;
1016 vector_vs[mpos->second][j]=65535;
1021 void AddCover()
1023 cout<<"Summarizing Coverage..."<<endl;
1024 CThreadManager threadMgr(NUM_THREADS);
1025 threadMgr.SetTimer(0.5);
1026 vector<param_struct> vparam(iPos.size());
1027 for(register uint i(0); i<iPos.size(); ++i)
1029 vparam[i].pos = iPos[i];
1030 if(plain)
1031 threadMgr.AddThread( _AddCover_plain, (void*)&vparam[i]);
1032 else if(general)
1033 threadMgr.AddThread( _AddCover_general, (void*)&vparam[i]);
1034 else if(sam)
1035 threadMgr.AddThread( _AddCover_sam, (void*)&vparam[i]);
1036 else if(pslsub)
1037 threadMgr.AddThread( _AddCover_pslsub, (void*)&vparam[i]);
1038 else if(pslquery)
1039 threadMgr.AddThread( _AddCover_pslquery, (void*)&vparam[i]);
1040 else if(maq)
1041 threadMgr.AddThread( _AddCover_maq, (void*)&vparam[i]);
1042 else if(m8subject)
1043 threadMgr.AddThread( _AddCover_m8subject, (void*)&vparam[i]);
1044 else if(m8query)
1045 threadMgr.AddThread( _AddCover_m8query, (void*)&vparam[i]);
1046 else if(mummerquery)
1047 threadMgr.AddThread( _AddCover_mummerquery, (void*)&vparam[i]);
1048 else if(axtoitg)
1049 threadMgr.AddThread( _AddCover_axtoitg, (void*)&vparam[i]);
1050 else if(axtoiq)
1051 threadMgr.AddThread( _AddCover_axtoiq, (void*)&vparam[i]);
1052 else if(qmap)
1053 threadMgr.AddThread( _AddCover_qmap, (void*)&vparam[i]);
1054 else
1055 threadMgr.AddThread( _AddCover_Sub, (void*)&vparam[i]);
1057 threadMgr.Run();
1060 void* _AddCover_plain(void* param)
1062 string ctemp, ctemp2;
1063 string temp;
1064 strmaps::iterator mpos;
1065 semaphore_maps::iterator semapos;
1066 param_struct* pParam = (param_struct*) param;
1067 static ushort progress(1);
1068 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1069 imethod InData(pParam->pos.c_str());
1070 while(InData)
1072 unsigned long end(0),start(0);
1073 for(;InData;)
1075 InData>>temp>>ctemp2>>ctemp;
1076 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1077 if(mpos == chrmaps.end())
1078 continue;
1079 start = atol(ctemp2.c_str());
1080 end = atol(ctemp.c_str());
1081 if(end < start)
1082 swap(start, end);
1083 if(tag)
1084 end = start;
1085 break;
1087 if(!InData) break;
1088 pthread_mutex_lock(semapos->second);
1089 if(end > vector_vs[mpos->second].size())
1090 end = vector_vs[mpos->second].size();
1091 for(; start < end ; ++start)
1093 if(vector_vs[mpos->second][start]<65534)
1094 ++(vector_vs[mpos->second][start]);
1096 pthread_mutex_unlock(semapos->second);
1100 void* _AddCover_general(void* param)
1102 uint rname(idCol - 1), pos(startCol - 1), seq(endCol - 1);
1103 uint maxCol = max(rname, pos);
1104 maxCol = max(maxCol, seq);
1105 string str;
1106 vector<string> vec_str;
1107 strmaps::iterator mpos;
1108 semaphore_maps::iterator semapos;
1109 param_struct* pParam = (param_struct*) param;
1110 static ushort progress(1);
1111 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1112 imethod InData(pParam->pos.c_str());
1113 while(InData)
1115 uint end(0),start(0);
1116 for(;InData;)
1118 getline(InData, str);
1119 if(!InData)
1120 break;
1121 if(!str.empty() && str[0]=='#')
1122 continue;
1123 vec_str.clear();
1124 regex_split(back_inserter(vec_str), str);
1125 if(vec_str.size() <= maxCol)
1126 continue;
1127 mpos = chrmaps.find(vec_str[rname]); semapos = chr_sema.find(vec_str[rname]);
1128 if(mpos == chrmaps.end())
1129 continue;
1130 start = atoi(vec_str[pos].c_str());
1131 (generalLen) ? (end = start + atoi(vec_str[seq].c_str())) : (end = atoi(vec_str[seq].c_str()));
1132 if(tag)
1133 end = start;
1134 break;
1136 if(!InData) break;
1137 pthread_mutex_lock(semapos->second);
1138 if(end > vector_vs[mpos->second].size())
1139 end = vector_vs[mpos->second].size();
1140 for(; start < end ; ++start)
1142 if(vector_vs[mpos->second][start]<65534)
1143 ++(vector_vs[mpos->second][start]);
1145 pthread_mutex_unlock(semapos->second);
1150 #define SINGLE_UNMAPED 0x0004
1151 #define PAIR_UNMAPED 0x0008
1153 void* _AddCover_sam(void* param)
1155 enum str_col{qname=0, flag, rname, pos, mapq, cigar, mrnm, mpos2, isize, seq, qual, tagmark, vtype};
1156 string str;
1157 vector<string> vec_str;
1158 strmaps::iterator mpos;
1159 semaphore_maps::iterator semapos;
1160 param_struct* pParam = (param_struct*) param;
1161 static ushort progress(1);
1162 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1163 imethod InData(pParam->pos.c_str());
1164 while(InData)
1166 uint end(0),start(0);
1167 for(;InData;)
1169 getline(InData, str);
1170 if(!InData)
1171 break;
1172 if(!str.empty() && str[0]=='@')
1173 continue;
1174 vec_str.clear();
1175 regex_split(back_inserter(vec_str), str);
1176 mpos = chrmaps.find(vec_str[rname]); semapos = chr_sema.find(vec_str[rname]);
1177 if(atoi(vec_str[flag].c_str()) & (SINGLE_UNMAPED ))//| PAIR_UNMAPED))
1178 continue;
1179 if(mpos == chrmaps.end())
1180 continue;
1181 start = atoi(vec_str[pos].c_str());
1182 end = start + vec_str[seq].size() - 1;
1183 if(tag)
1184 end = start;
1185 break;
1187 if(!InData) break;
1188 pthread_mutex_lock(semapos->second);
1189 if(end > vector_vs[mpos->second].size())
1190 end = vector_vs[mpos->second].size();
1191 for(; start < end ; ++start)
1193 if(vector_vs[mpos->second][start]<65534)
1194 ++(vector_vs[mpos->second][start]);
1196 pthread_mutex_unlock(semapos->second);
1200 void* _AddCover_Sub(void* param)
1202 string ctemp, ctemp2, uniqmark, mismatchmark;
1203 string temp;
1204 strmaps::iterator mpos;
1205 semaphore_maps::iterator semapos;
1206 param_struct* pParam = (param_struct*) param;
1207 static ushort progress(1);
1208 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1210 imethod InData(pParam->pos.c_str());
1212 vpsu * sp_sui = &sp_sui_single;
1213 if(sp)
1215 for(register size_t i(0); i < isPos.size(); ++i)
1216 if(isPos[i] == pParam->pos)
1217 sp_sui = &sp_sui_single;
1218 for(register size_t i(0); i < ipPos.size(); ++i)
1219 if(ipPos[i] == pParam->pos)
1220 sp_sui = &sp_sui_soap;
1221 if(sp_sui == NULL)
1222 cerr<<(pParam->pos)<<" not found in both il_single and il_soap!"<<endl;
1225 vector<uint> mismatch_bypass_vector;
1226 while(true)
1228 uint length(0),start(0), mismatch(0);
1229 for(;;)
1231 //4 or uniqmark
1232 InData>>ctemp;InData>>ctemp;InData>>ctemp;InData>>uniqmark;
1233 if(!InData) break;
1234 if(onlyuniq && uniqmark!="1")
1236 InData.ignore(100000,'\n');
1237 continue;
1239 //6 for length
1240 InData>>ctemp;InData>>ctemp2;
1241 //8 for chr
1242 InData>>ctemp; InData>>temp;
1243 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1244 if(mpos == chrmaps.end())
1246 InData.ignore(100000,'\n');
1247 if(!nowarning)
1248 cerr<<"Omitted one row cuz \""<<temp<<"\" not found!"<<endl;
1249 continue;
1251 //9 for start
1252 InData>>ctemp;
1253 if(ctemp.size() > 9)
1255 InData.ignore(100000,'\n');
1256 if(!nowarning)
1257 cerr<<"Omitted one row cuz position\""<<ctemp<<"\" make no sense!"<<endl;
1258 continue; //Validation
1260 //10 for mismatchmark
1261 InData>>mismatchmark;
1262 start = atoi(ctemp.c_str());
1263 length = atoi(ctemp2.c_str());
1264 //bool continue_match(false);
1265 if(precise)
1267 mismatch = atoi(mismatchmark.c_str());
1268 mismatch_bypass_vector.clear();
1269 if((mismatch == 1) || (mismatch == 2))
1271 string mismatchpos;
1272 getline(InData, mismatchpos, '\n');
1273 vector<string> vec_mismatchpos;
1274 regex_split(back_inserter(vec_mismatchpos), mismatchpos);
1275 //mismatchpos = vec_mismatchpos[vec_mismatchpos.size()-1];
1277 mismatch = SeperateCigar(vec_mismatchpos[vec_mismatchpos.size()-1], mismatch_bypass_vector);
1278 if(!mismatch)
1280 if(!nowarning)
1281 cerr<<"Regex match error on string: "<<endl
1282 <<temp<<'\t'<<start<<'\t'<<length<<'\t'<<mismatchpos<<endl;
1283 mismatch_bypass_vector.clear();
1284 break;
1287 else
1288 InData.ignore(100000,'\n');
1290 else
1291 InData.ignore(100000,'\n');
1292 break;
1294 if(!InData) break;
1295 if(!length)
1297 if(!nowarning)
1298 cerr<<"Omitted one row cuz length equals to 0!"<<endl;
1299 continue; //Validation
1301 if(tag)
1302 length = 1;
1303 pthread_mutex_lock(semapos->second);
1305 uint j(start+length);
1306 if(j > vector_vs[mpos->second].size())
1307 j = vector_vs[mpos->second].size();
1308 for(uint loopcount(0); start<j ; ++start,++loopcount)
1310 if(precise)
1311 for(uint checkmismatch_loopcount(0); checkmismatch_loopcount < mismatch_bypass_vector.size(); ++checkmismatch_loopcount)
1312 if(mismatch_bypass_vector[checkmismatch_loopcount] == loopcount)
1313 continue;
1314 if(vector_vs[mpos->second][start]<65534)
1315 ++(vector_vs[mpos->second][start]);
1318 if(sp)
1320 uint end(start+length-1);
1321 if(end >= sp_vvs[mpos->second].size())
1322 end = sp_vvs[mpos->second].size() - 1;
1323 //if(sp_vvs[mpos->second][start] >= (*sp_sui).size() || sp_vvs[mpos->second][end] >= (*sp_sui).size())
1325 //cerr<<sp_vvs[mpos->second][start]<<'\t'<<sp_vvs[mpos->second][end]<<'\t'<<((*sp_sui).size())<<endl;
1326 // continue;
1328 if(sp_vvs[mpos->second][start] == 0 && sp_vvs[mpos->second][end] == 0)
1329 continue;
1330 else if(sp_vvs[mpos->second][start] != 0 && sp_vvs[mpos->second][end] == 0)
1331 ++((*sp_sui)[sp_vvs[mpos->second][start]]).second;
1332 else if(sp_vvs[mpos->second][start] == 0 && sp_vvs[mpos->second][end] != 0)
1333 ++((*sp_sui)[sp_vvs[mpos->second][end]]).second;
1334 else if(sp_vvs[mpos->second][start] == sp_vvs[mpos->second][end] && sp_vvs[mpos->second][start] != 0)
1335 ++((*sp_sui)[sp_vvs[mpos->second][start]]).second;
1336 else if(sp_vvs[mpos->second][start] != sp_vvs[mpos->second][end] && sp_vvs[mpos->second][start] != 0 && sp_vvs[mpos->second][end] != 0)
1338 ++((*sp_sui)[sp_vvs[mpos->second][start]]).second;
1339 ++((*sp_sui)[sp_vvs[mpos->second][end]]).second;
1341 else
1343 cerr<<sp_vvs[mpos->second][start]<<'\t'<<sp_vvs[mpos->second][end]<<" Exceptional S/P ratio condition!"<<endl;
1346 pthread_mutex_unlock(semapos->second);
1350 void* _AddCover_pslsub(void* param)
1352 string ctemp, ctemp2;
1353 string length, substart;
1354 vector<string> length_vec, substart_vec;
1355 regex e(",");
1356 string temp;
1357 strmaps::iterator mpos;
1358 semaphore_maps::iterator semapos;
1359 param_struct* pParam = (param_struct*) param;
1360 static ushort progress(1);
1361 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1362 imethod InData(pParam->pos.c_str());
1363 while(InData)
1365 uint end(0),start(0);
1366 for(;InData;)
1368 //Omitting data
1369 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp;
1370 //14 for chr
1371 InData>>temp;
1372 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1373 if(mpos == chrmaps.end())
1375 InData.ignore(100000,'\n');
1376 continue;
1378 InData>>ctemp>>ctemp>>ctemp>>ctemp>>length;
1379 InData>>ctemp>>substart;
1380 length_vec.clear();
1381 substart_vec.clear();
1382 regex_split(back_inserter(length_vec), length, e);
1383 regex_split(back_inserter(substart_vec), substart, e);
1384 break;
1386 if(!InData) break;
1387 pthread_mutex_lock(semapos->second);
1388 for(uint vec_it(0); vec_it < length_vec.size(); ++vec_it)
1390 start = atoi(substart_vec[vec_it].c_str());
1391 end = start + atoi(length_vec[vec_it].c_str());
1392 if(end >= vector_vs[mpos->second].size())
1393 end = vector_vs[mpos->second].size();
1394 for(; start < end ; ++start)
1396 if(vector_vs[mpos->second][start]<65534)
1397 ++(vector_vs[mpos->second][start]);
1400 pthread_mutex_unlock(semapos->second);
1404 void* _AddCover_pslquery(void* param)
1406 string ctemp, ctemp2;
1407 string length, substart;
1408 vector<string> length_vec, substart_vec;
1409 regex e(",");
1410 string temp;
1411 strmaps::iterator mpos;
1412 semaphore_maps::iterator semapos;
1413 param_struct* pParam = (param_struct*) param;
1414 static ushort progress(1);
1415 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1416 imethod InData(pParam->pos.c_str());
1417 while(InData)
1419 uint end(0),start(0);
1420 for(;InData;)
1422 //Omitting data
1423 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp;
1424 //14 for chr
1425 InData>>temp;
1426 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1427 if(mpos == chrmaps.end())
1429 InData.ignore(100000,'\n');
1430 continue;
1432 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>length;
1433 InData>>substart>>ctemp;
1434 length_vec.clear();
1435 substart_vec.clear();
1436 regex_split(back_inserter(length_vec), length, e);
1437 regex_split(back_inserter(substart_vec), substart, e);
1438 break;
1440 if(!InData) break;
1441 pthread_mutex_lock(semapos->second);
1442 for(uint vec_it(0); vec_it < length_vec.size(); ++vec_it)
1444 start = atoi(substart_vec[vec_it].c_str());
1445 end = start + atoi(length_vec[vec_it].c_str());
1446 if(end >= vector_vs[mpos->second].size())
1447 end = vector_vs[mpos->second].size();
1448 for(; start < end ; ++start)
1450 if(vector_vs[mpos->second][start]<65534)
1451 ++(vector_vs[mpos->second][start]);
1454 pthread_mutex_unlock(semapos->second);
1458 void* _AddCover_maq(void* param)
1460 string ctemp, ctemp2;
1461 string temp;
1462 strmaps::iterator mpos;
1463 semaphore_maps::iterator semapos;
1464 param_struct* pParam = (param_struct*) param;
1465 static ushort progress(1);
1466 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1467 imethod InData(pParam->pos.c_str());
1468 while(InData)
1470 uint end(0),start(0);
1471 for(;InData;)
1473 //Omitting data
1474 InData>>ctemp;
1475 //2 for chr
1476 InData>>temp;
1477 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1478 if(mpos == chrmaps.end())
1480 InData.ignore(100000,'\n');
1481 continue;
1483 //3 for start
1484 InData>>ctemp2;
1485 //14 for end
1486 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp;
1487 start = atoi(ctemp2.c_str());
1488 end = atoi(ctemp.c_str()) + start - 1;
1489 InData.ignore(100000,'\n');
1490 if(tag)
1491 end = start;
1492 break;
1494 if(!InData) break;
1495 pthread_mutex_lock(semapos->second);
1496 for(register uint limit(vector_vs[mpos->second].size()); start <= end ; ++start)
1498 if(start >= limit) break;
1499 if(vector_vs[mpos->second][start]<65534)
1500 ++(vector_vs[mpos->second][start]);
1502 pthread_mutex_unlock(semapos->second);
1506 void* _AddCover_m8subject(void* param)
1508 string ctemp, ctemp2;
1509 string temp;
1510 strmaps::iterator mpos;
1511 semaphore_maps::iterator semapos;
1512 param_struct* pParam = (param_struct*) param;
1513 static ushort progress(1);
1514 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1515 imethod InData(pParam->pos.c_str());
1516 while(InData)
1518 uint end(0),start(0);
1519 for(;InData;)
1521 //Omitting data
1522 InData>>ctemp;
1523 //2 for chr
1524 InData>>temp;
1525 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1526 if(mpos == chrmaps.end())
1528 InData.ignore(100000,'\n');
1529 continue;
1531 //9 for start
1532 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp2;
1533 //10 for end
1534 InData>>ctemp;
1535 start = atoi(ctemp2.c_str());
1536 end = atoi(ctemp.c_str());
1537 if(end < start)
1538 swap(start, end);
1539 InData.ignore(100000,'\n');
1540 break;
1542 if(!InData) break;
1543 pthread_mutex_lock(semapos->second);
1544 for(register uint limit(vector_vs[mpos->second].size()); start <= end ; ++start)
1546 if(start >= limit) break;
1547 if(vector_vs[mpos->second][start]<65534)
1548 ++(vector_vs[mpos->second][start]);
1550 pthread_mutex_unlock(semapos->second);
1554 void* _AddCover_m8query(void* param)
1556 string ctemp, ctemp2;
1557 string temp;
1558 strmaps::iterator mpos;
1559 semaphore_maps::iterator semapos;
1560 param_struct* pParam = (param_struct*) param;
1561 static ushort progress(1);
1562 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1563 imethod InData(pParam->pos.c_str());
1564 while(InData)
1566 uint end(0),start(0);
1567 for(;InData;)
1569 //1 for chr
1570 InData>>temp;
1571 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1572 if(mpos == chrmaps.end())
1574 InData.ignore(100000,'\n');
1575 continue;
1577 //7 for start
1578 InData>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp>>ctemp2;
1579 //8 for end
1580 InData>>ctemp;
1581 start = atoi(ctemp2.c_str());
1582 end = atoi(ctemp.c_str());
1583 if(start > end)
1584 swap(start, end);
1585 InData.ignore(100000,'\n');
1586 break;
1588 if(!InData) break;
1589 pthread_mutex_lock(semapos->second);
1590 for(register uint limit(vector_vs[mpos->second].size()); start <= end ; ++start)
1592 if(start >= limit) break;
1593 if(vector_vs[mpos->second][start]<65534)
1594 ++(vector_vs[mpos->second][start]);
1596 pthread_mutex_unlock(semapos->second);
1600 void* _AddCover_mummerquery(void* param)
1602 string ctemp, ctemp2;
1603 string temp(">");
1604 strmaps::iterator mpos;
1605 semaphore_maps::iterator semapos;
1606 param_struct* pParam = (param_struct*) param;
1607 static ushort progress(1);
1608 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1609 imethod InData(pParam->pos.c_str());
1611 bool four_column(false);
1612 //mummer format judgement
1613 for(;;)
1615 while(temp.find(">") != string::npos)
1616 getline(InData, temp);
1617 stringstream ss(temp);
1618 int i(0);
1619 for(;;++i)
1621 ss>>temp;
1622 if(!ss)
1623 break;
1625 if(i == 4)
1626 four_column = true;
1627 InData.seekg(0);
1628 break;
1631 //Data processing
1632 while(InData)
1634 uint end(0),start(0);
1635 bool reverse_mark(false);
1636 bool read_signal(true);
1637 string name;
1638 for(;;)
1640 //For special mark or ref or ref position
1641 if(read_signal)
1642 InData>>temp;
1643 else
1644 read_signal = true;
1646 if(!InData) break;
1647 if(!temp.empty() && temp[0] == '>')
1649 reverse_mark = false;
1650 InData>>temp;
1651 name = temp;
1652 mpos = chrmaps.find(name); semapos = chr_sema.find(name);
1653 if(mpos == chrmaps.end())
1655 cerr<<"Fatal error! Name "<<name<<" in mummer result file missing in reference, input file bypassed!"<<endl;
1656 return (void*)(NULL);
1658 InData>>temp;
1659 if(temp.find("Reverse") != string::npos)
1661 reverse_mark = true;
1662 InData>>temp;
1664 if(temp[0] == '>')
1666 read_signal = false;
1667 continue;
1671 if(four_column)
1673 //For query position
1674 InData>>ctemp;
1675 InData>>ctemp;
1677 else
1678 //For query position
1679 InData>>ctemp;
1681 //For length
1682 InData>>ctemp2;
1684 if(atoi(ctemp2.c_str()) < mummer_limit)
1685 continue;
1686 /*if(atoi(ctemp2.c_str()) > 100000)
1688 cerr<<"Record "<<ctemp<<'\t'<<ctemp2<<" > 100000"<<"! Omited."<<endl;
1689 continue;
1692 if(reverse_mark)
1694 end = vector_vs[mpos->second].size() - atoi(ctemp.c_str());
1695 start = end - atoi(ctemp2.c_str());
1697 else
1699 start = atoi(ctemp.c_str());
1700 end = start + atoi(ctemp2.c_str());
1702 break;
1705 if(tag)
1706 end = start + 1;
1708 if(!InData) break;
1709 pthread_mutex_lock(semapos->second);
1710 for(register uint limit(vector_vs[mpos->second].size()); start < end ; ++start)
1712 if(start >= limit) break;
1713 if(vector_vs[mpos->second][start]<65534)
1714 ++(vector_vs[mpos->second][start]);
1716 pthread_mutex_unlock(semapos->second);
1720 void* _AddCover_qmap(void* param)
1722 string temp(">"), strTemp;
1723 strmaps::iterator mpos;
1724 semaphore_maps::iterator semapos;
1725 param_struct* pParam = (param_struct*) param;
1726 static ushort progress(1);
1727 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1728 imethod InData(pParam->pos.c_str());
1729 ulong position(0);
1730 uint met(0), uMet(0);
1731 bool locked(false);
1733 //Data processing
1734 while(InData)
1736 getline(InData, temp, '\n');
1737 if(!InData)
1738 break;
1739 if(!temp.empty() && temp[0] == '>')
1741 if(locked)
1742 pthread_mutex_unlock(semapos->second);
1743 locked = false;
1744 position = 0;
1745 string name(temp.substr(1, temp.find_first_of(" \t") - 1));
1746 mpos = chrmaps.find(name); semapos = chr_sema.find(name);
1747 if(mpos == chrmaps.end())
1749 cerr<<"Fatal error! Name "<<name<<" in qmap missing in reference, input file bypassed!"<<endl;
1750 return (void*)(NULL);
1752 pthread_mutex_lock(semapos->second);
1753 locked = true;
1755 else
1757 stringstream ss(temp);
1758 ss>>strTemp>>met>>uMet;
1759 vector_vs[mpos->second][position] += (met + uMet);
1764 void* _AddCover_axtoitg(void* param)
1766 string ctemp, ctemp2;
1767 string temp;
1768 strmaps::iterator mpos;
1769 semaphore_maps::iterator semapos;
1770 param_struct* pParam = (param_struct*) param;
1771 static ushort progress(1);
1772 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1773 imethod InData(pParam->pos.c_str());
1774 while(InData)
1776 uint end(0),start(0);
1777 for(;InData;)
1779 //2 for chr
1780 InData>>temp;
1781 if(InData && (temp.empty() || isalpha(temp[0])))
1782 continue;
1783 InData>>temp;
1784 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1785 if(mpos == chrmaps.end())
1787 InData.ignore(100000,'\n');
1788 continue;
1790 //3 for start
1791 InData>>ctemp2;
1792 //4 for end
1793 InData>>ctemp;
1794 start = atoi(ctemp2.c_str());
1795 end = atoi(ctemp.c_str());
1796 InData.ignore(100000,'\n');
1797 break;
1799 if(!InData) break;
1800 pthread_mutex_lock(semapos->second);
1801 for(register uint limit(vector_vs[mpos->second].size()); start <= end ; ++start)
1803 if(start >= limit) break;
1804 if(vector_vs[mpos->second][start]<65534)
1805 ++(vector_vs[mpos->second][start]);
1807 pthread_mutex_unlock(semapos->second);
1811 void* _AddCover_axtoiq(void* param)
1813 string ctemp, ctemp2;
1814 string temp;
1815 string strand;
1816 strmaps::iterator mpos;
1817 semaphore_maps::iterator semapos;
1818 param_struct* pParam = (param_struct*) param;
1819 static ushort progress(1);
1820 cout<<progress++<<'/'<<iPos.size()<<": "<<pParam->pos<<endl;
1821 imethod InData(pParam->pos.c_str());
1822 while(InData)
1824 uint end(0),start(0);
1825 for(;InData;)
1827 //5 for chr
1828 InData>>temp;
1829 if(InData && (temp.empty() || isalpha(temp[0])))
1830 continue;
1831 InData>>temp>>temp>>temp>>temp;
1832 mpos = chrmaps.find(temp); semapos = chr_sema.find(temp);
1833 if(mpos == chrmaps.end())
1835 InData.ignore(100000,'\n');
1836 continue;
1838 //6 for start
1839 InData>>ctemp2;
1840 //7 for end
1841 InData>>ctemp;
1842 //8 for strand
1843 InData>>strand;
1844 if(!strand.empty() && strand[0]=='-')
1846 start = vector_vs[mpos->second].size() - atoi(ctemp.c_str());
1847 end = vector_vs[mpos->second].size() - atoi(ctemp2.c_str());
1849 else
1851 start = atoi(ctemp2.c_str());
1852 end = atoi(ctemp.c_str());
1854 InData.ignore(100000,'\n');
1855 break;
1857 if(!InData) break;
1858 pthread_mutex_lock(semapos->second);
1859 for(register uint limit(vector_vs[mpos->second].size()); start <= end ; ++start)
1861 if(start >= limit) break;
1862 if(vector_vs[mpos->second][start]<65534)
1863 ++(vector_vs[mpos->second][start]);
1865 pthread_mutex_unlock(semapos->second);
1869 ulong AddTotalBP()
1871 ulong singleBP(0);
1872 string ctemp;
1873 ifstream refData(temp.c_str());
1874 if(!refData)
1876 cerr<<"Error while opening: "<<temp<<endl;
1877 exit(EXIT_FAILURE);
1879 getline(refData,ctemp); //Abandon the first information line
1880 while(refData>>ctemp) singleBP+=ctemp.size();
1881 totalBP += (singleBP - trim * 2);
1882 return (singleBP+1);
1885 ulong AddTotalBP_refsingle()
1887 return
1888 refsingle_marker[temp];
1891 ulong AddTotalBP_fastat()
1893 return
1894 refsingle_marker[temp];
1897 void BuildMemory()
1899 progress_display ptcount(chr.size(), cout, "Building Memory Blocks...\n");
1900 strsets::iterator pos;
1901 for(pos=chr.begin(); pos!=chr.end(); ++pos)
1903 ++ptcount;
1904 ulong singleBP(0);
1905 if(refsingle)
1907 temp = *pos;
1908 singleBP=AddTotalBP_refsingle();
1910 else if(reffastat)
1912 temp = *pos;
1913 singleBP=AddTotalBP_fastat();
1915 else if(::ref)
1917 temp = rPos + '/' + *pos + ".fa";
1918 singleBP=AddTotalBP();
1921 vector_vs.push_back(vs(singleBP,0));
1922 if(sp)
1923 sp_vvs.push_back(vi(singleBP, 0));
1927 void MakeMaps()
1929 cout<<"Shadowing Map..."<<endl;
1930 strsets::iterator pos;
1931 for(pos=chr.begin(); pos!=chr.end(); ++pos)
1933 static int i(0);
1934 chrmaps.insert(make_pair(*pos,i));
1935 chr_sema.insert(make_pair(*pos, new pthread_mutex_t));
1936 if(pthread_mutex_init(chr_sema[*pos], NULL))
1938 cerr<<"Mutex on "<<*pos<<" initialization error!"<<endl;
1939 exit(EXIT_FAILURE);
1941 i++;
1943 cerr<<"Mutex Lock created: "<<chr_sema.size()<<endl;
1946 void ParaDeal(int & argc,char** argv)
1948 string temp;
1949 char ctemp[10000];
1950 if(argc == 1)
1952 cerr<<USAGE;
1953 exit(EXIT_SUCCESS);
1955 cout<<endl<<"Parameters List:";
1956 //Parameters Deals
1957 for(register int i(1),next(0); i<argc; i++)
1959 int rightPara(0);
1960 if (next)
1962 cout<<argv[i]<<' ';
1963 --next;
1964 continue;
1966 if (strcmp(argv[i],"-h") == 0 || strcmp(argv[i],"-help") == 0)
1968 cerr<<USAGE;
1969 exit(EXIT_SUCCESS);
1971 if (strcmp(argv[i], "-precisionOffset") == 0 || strcmp(argv[i], "-po") == 0)
1973 ComfirmAllDigit(argv[i+1]);
1974 precisionOffset = atoi(argv[i+1]);
1975 rightPara++;
1976 next++;
1978 if (strcmp(argv[i], "-minimumDepth") == 0 || strcmp(argv[i], "-mind") == 0)
1980 ComfirmAllDigit(argv[i+1]);
1981 minimumDepth = atoi(argv[i+1]);
1982 rightPara++;
1983 next++;
1985 if (strcmp(argv[i], "-maximumDepth") == 0 || strcmp(argv[i], "-maxd") == 0)
1987 ComfirmAllDigit(argv[i+1]);
1988 maximumDepth = atoi(argv[i+1]);
1989 rightPara++;
1990 next++;
1992 if (strcmp(argv[i],"-nowarning") == 0 || strcmp(argv[i],"-nw") == 0)
1994 nowarning = true;
1995 rightPara++;
1997 if (strcmp(argv[i],"-onlyuniq") == 0 || strcmp(argv[i],"-ou") == 0)
1999 onlyuniq = true;
2000 rightPara++;
2002 if (strcmp(argv[i],"-precise") == 0)
2004 precise = true;
2005 rightPara++;
2007 if (strcmp(argv[i],"-plain") == 0)
2009 plain = true;
2010 rightPara++;
2012 if (strcmp(argv[i],"-sam") == 0)
2014 sam = true;
2015 rightPara++;
2017 if (strcmp(argv[i],"-qmap") == 0)
2019 qmap = true;
2020 rightPara++;
2022 if (strcmp(argv[i],"-pslquery") == 0)
2024 pslquery = true;
2025 rightPara++;
2027 if (strcmp(argv[i],"-pslsub") == 0)
2029 pslsub = true;
2030 rightPara++;
2032 if (strcmp(argv[i],"-maq") == 0)
2034 maq = true;
2035 rightPara++;
2037 if (strcmp(argv[i],"-m8subject") == 0)
2039 m8subject = true;
2040 rightPara++;
2042 if (strcmp(argv[i],"-m8query") == 0)
2044 m8query = true;
2045 rightPara++;
2047 if (strcmp(argv[i],"-mummerquery") == 0)
2049 mummerquery = true;
2050 ComfirmAllDigit(argv[i+1]);
2051 mummer_limit = atoi(argv[i+1]);
2052 if(mummer_limit < 0) mummer_limit = 20;
2053 rightPara++;
2054 next++;
2056 if (strcmp(argv[i],"-axtoitg") == 0)
2058 axtoitg = true;
2059 rightPara++;
2061 if (strcmp(argv[i],"-axtoiq") == 0)
2063 axtoiq = true;
2064 rightPara++;
2066 if (strcmp(argv[i],"-cvg") == 0)
2068 if(phy)
2070 cerr<<"Parameter \"-cvg\" can't be used with \"-phy\""<<endl;
2071 exit(EXIT_FAILURE);
2073 cvg = true;
2074 rightPara++;
2076 if (strcmp(argv[i],"-nospace") == 0)
2078 nospace = true;
2080 if (strcmp(argv[i],"-phy") == 0)
2082 if(cvg)
2084 cerr<<"Parameter \"-phy\" can't be used with \"-cvg\""<<endl;
2085 exit(EXIT_FAILURE);
2087 phy = true;
2088 rightPara++;
2090 if (strcmp(argv[i],"-tag") == 0)
2092 if(phy)
2094 cerr<<"Parameter \"-tag\" can't be used with \"-phy\""<<endl;
2095 exit(EXIT_FAILURE);
2097 cvg = true;
2098 tag = true;
2099 rightPara++;
2101 if (strcmp(argv[i],"-nocalc") == 0 || strcmp(argv[i],"-nc") == 0)
2103 nocalc = true;
2104 rightPara++;
2106 if (strcmp(argv[i],"-onlycover") == 0 || strcmp(argv[i],"-oc") == 0)
2108 onlycover = true;
2109 rightPara++;
2111 if (strcmp(argv[i],"-p") == 0)
2113 ComfirmAllDigit(argv[i+1]);
2114 NUM_THREADS = atoi(argv[i+1]);
2115 rightPara++;
2116 next++;
2118 if (strcmp(argv[i],"-plot") == 0)
2120 plot = true;
2121 if(i + 3 >= argc)
2123 cerr<<"\n-plot parameter format error!\n";
2124 exit(1);
2126 plotPos = argv[i+1];
2127 ComfirmAllDigit(argv[i+2]);
2128 ComfirmAllDigit(argv[i+3]);
2129 plot_lower = atoi(argv[i+2]);
2130 plot_upper = atoi(argv[i+3]);
2131 if(plot_lower < 0) plot_lower = 0;
2132 if(plot_upper > 65534) plot_upper = 65534;
2133 rightPara++;
2134 next+=3;
2136 if (strcmp(argv[i],"-general") == 0 || strcmp(argv[i],"-generallen") == 0)
2138 if(strcmp(argv[i],"-general") == 0)
2139 general = true;
2140 else
2141 general = generalLen = true;
2142 if(i + 3 >= argc)
2144 cerr<<"\n-general or -generalLen parameter format error!\n";
2145 exit(1);
2147 ComfirmAllDigit(argv[i+1]);
2148 ComfirmAllDigit(argv[i+2]);
2149 ComfirmAllDigit(argv[i+3]);
2150 idCol = atoi(argv[i+1]);
2151 startCol = atoi(argv[i+2]);
2152 endCol = atoi(argv[i+3]);
2153 rightPara++;
2154 next+=3;
2156 if (strcmp(argv[i],"-duplicate") == 0)
2158 ComfirmAllDigit(argv[i+1]);
2159 duplicate = atoi(argv[i+1]);
2160 rightPara++;
2161 next++;
2163 if (strcmp(argv[i],"-insertupper") == 0 || strcmp(argv[i],"-iu") == 0)
2165 ComfirmAllDigit(argv[i+1]);
2166 insert_Limit = atoi(argv[i+1]);
2167 rightPara++;
2168 next++;
2170 if (strcmp(argv[i],"-insertlower") == 0 || strcmp(argv[i],"-ilo") == 0)
2172 ComfirmAllDigit(argv[i+1]);
2173 insert_LowerLimit = atoi(argv[i+1]);
2174 rightPara++;
2175 next++;
2177 if (strcmp(argv[i],"-addn") == 0)
2179 nPos = argv[i+1];
2180 addN=true;
2181 rightPara++;
2182 next++;
2184 if (strcmp(argv[i],"-depthinput") == 0 || strcmp(argv[i],"-di") == 0)
2186 cPos = argv[i+1];
2187 depthinput=true;
2188 rightPara++;
2189 next++;
2191 if (strcmp(argv[i],"-depthinputsam") == 0 || strcmp(argv[i],"-dis") == 0)
2193 cPos = argv[i+1];
2194 depthinputsam=true;
2195 rightPara++;
2196 next++;
2198 if (strcmp(argv[i],"-cdsinput") == 0 || strcmp(argv[i],"-ci") == 0)
2200 cdsPos = argv[i+1];
2201 cdsinput=true;
2202 rightPara++;
2203 next++;
2205 if (strcmp(argv[i],"-point") == 0)
2207 pointPos = argv[i+1];
2208 pointOut = argv[i+2];
2209 rightPara++;
2210 next+=2;
2212 if (strcmp(argv[i],"-sp") == 0)
2214 sp = true;
2215 spIn = argv[i+1];
2216 spOut = argv[i+2];
2217 rightPara++;
2218 next+=2;
2220 if (strcmp(argv[i],"-pesupport") == 0 || strcmp(argv[i],"-ps") == 0)
2222 pesupport = true;
2223 sp = true;
2224 spIn = argv[i+1];
2225 spOut = argv[i+2];
2226 rightPara++;
2227 next+=2;
2229 if (strcmp(argv[i],"-cdsplot") == 0 || strcmp(argv[i],"-cp") == 0)
2231 cdsplot = true;
2232 cdsPos_out = argv[i+1];
2233 ComfirmAllDigit(argv[i+2]);
2234 ComfirmAllDigit(argv[i+3]);
2235 cds_plot_lower = atoi(argv[i+2]);
2236 cds_plot_upper = atoi(argv[i+3]);
2237 if(cds_plot_lower < 0) cds_plot_lower = 0;
2238 if(cds_plot_upper > 65534) cds_plot_upper = 65534;
2239 rightPara++;
2240 next+=3;
2242 if (strcmp(argv[i],"-cdsdetail") == 0 || strcmp(argv[i],"-cd") == 0)
2244 cdsdetail = true;
2245 cdsdetailPos = argv[i+1];
2246 rightPara++;
2247 next++;
2249 if (strcmp(argv[i],"-window") == 0)
2251 window = true;
2252 windowPos_out = argv[i+1];
2253 ComfirmAllDigit(argv[i+2]);
2254 window_size = atoi(argv[i+2]);
2255 if(window_size < 0) window_size = 0;
2256 rightPara++;
2257 next+=2;
2259 if (strcmp(argv[i],"-trim") == 0)
2261 ComfirmAllDigit(argv[i+1]);
2262 trim = atoi(argv[i+1]);
2263 rightPara++;
2264 next++;
2266 if (strcmp(argv[i],"-depth") == 0)
2268 dPos = argv[i+1];
2269 depth=true;
2270 rightPara++;
2271 next++;
2273 if (strcmp(argv[i],"-depthsingle") == 0 || strcmp(argv[i],"-ds") == 0)
2275 dPos = argv[i+1];
2276 depthsingle=true;
2277 rightPara++;
2278 next++;
2280 if (strcmp(argv[i],"-depthsinglebin") == 0 || strcmp(argv[i],"-dsb") == 0)
2282 dPos = argv[i+1];
2283 depthsinglebin=true;
2284 rightPara++;
2285 next++;
2287 if (strcmp(argv[i],"-o") == 0)
2289 oPos = argv[i+1];
2290 rightPara++;
2291 next++;
2293 if (strcmp(argv[i],"-i") == 0)
2295 addon = true;
2296 for(register int j(1); (i+j)<argc; ++j)
2298 if (argv[i+j][0] == '-') break;
2299 igzstream InData_sub(argv[i+j]);
2300 if(!InData_sub)
2302 cerr<<"Error opening Soap file:"<<argv[i+j]<<endl;
2303 exit(EXIT_FAILURE);
2305 iPos.push_back(argv[i+j]);
2306 next++;
2308 rightPara++;
2310 if (strcmp(argv[i],"-il") == 0)
2312 addon = true;
2313 igzstream InData(argv[i+1]);
2314 if(!InData)
2316 cerr<<"Error opening Soap list-file:"<<argv[i+1]<<endl;
2317 exit(EXIT_FAILURE);
2319 for(InData>>ctemp; InData; InData>>ctemp)
2321 iPos.push_back(ctemp);
2323 for(register uint i(0); i<iPos.size(); ++i)
2325 igzstream InData_sub(iPos[i].c_str());
2326 if(!InData_sub)
2328 cerr<<"Error opening Soap file:"<<iPos[i]<<endl;
2329 exit(EXIT_FAILURE);
2332 rightPara++;
2333 next++;
2335 if (strcmp(argv[i],"-il_single") == 0)
2337 addon = true;
2338 il_single = true;
2339 igzstream InData(argv[i+1]);
2340 if(!InData)
2342 cerr<<"Error opening Soap single list-file:"<<argv[i+1]<<endl;
2343 exit(EXIT_FAILURE);
2345 for(InData>>ctemp; InData; InData>>ctemp)
2347 iPos.push_back(ctemp);
2348 isPos.push_back(ctemp);
2350 for(register uint i(0); i<isPos.size(); ++i)
2352 igzstream InData_sub(isPos[i].c_str());
2353 if(!InData_sub)
2355 cerr<<"Error opening Soap single file:"<<isPos[i]<<endl;
2356 exit(EXIT_FAILURE);
2359 rightPara++;
2360 next++;
2362 if (strcmp(argv[i],"-il_soap") == 0)
2364 addon = true;
2365 il_soap = true;
2366 igzstream InData(argv[i+1]);
2367 if(!InData)
2369 cerr<<"Error opening Soap PE list-file:"<<argv[i+1]<<endl;
2370 exit(EXIT_FAILURE);
2372 for(InData>>ctemp; InData; InData>>ctemp)
2374 iPos.push_back(ctemp);
2375 ipPos.push_back(ctemp);
2377 for(register uint i(0); i<ipPos.size(); ++i)
2379 igzstream InData_sub(ipPos[i].c_str());
2380 if(!InData_sub)
2382 cerr<<"Error opening Soap PE file:"<<ipPos[i]<<endl;
2383 exit(EXIT_FAILURE);
2386 rightPara++;
2387 next++;
2389 if (strcmp(argv[i],"-ref") == 0)
2391 ::ref = true;
2392 igzstream InData(argv[i+1]);
2393 if(!InData)
2395 cerr<<"Error opening Reference list file: "<<argv[i+1]<<endl;
2396 exit(EXIT_FAILURE);
2398 for(InData>>temp; InData; InData>>temp)
2400 rPos = temp.substr(0,temp.rfind('/'));
2401 temp=temp.substr((temp.rfind('/')+1), string::npos);
2402 temp=temp.substr(0, temp.find(".fa"));
2403 chr.insert(temp);
2405 rightPara++;
2406 next++;
2408 if (strcmp(argv[i],"-refsingle") == 0 || strcmp(argv[i],"-rs") == 0)
2410 rPos = argv[i+1];
2411 rightPara++;
2412 next++;
2413 refsingle=true;
2415 if (strcmp(argv[i],"-reffastat") == 0 || strcmp(argv[i],"-rfs") == 0)
2417 rPos = argv[i+1];
2418 rightPara++;
2419 next++;
2420 reffastat=true;
2422 if (rightPara == 0)
2424 cout<<"Parameter error on \""<<argv[i]<<"\""<<endl;
2425 exit(EXIT_FAILURE);
2427 else
2428 cout<<endl<<argv[i]<<' ';
2430 cout<<endl<<"# End of parameters list"<<endl<<endl;
2432 //Parameter Logicals
2433 if((cvg && phy) || (!cvg && !phy))
2435 cerr<<"At least and only one of the \"-cvg\", \"-phy\" or \"-tag\" should be selected!"<<endl;
2436 exit(EXIT_FAILURE);
2438 if(pslsub && phy)
2440 cerr<<"-pslsub should only be used with -cvg other than -phy!"<<endl;
2441 exit(EXIT_FAILURE);
2443 if(pslquery && phy)
2445 cerr<<"-pslquery should only be used with -cvg other than -phy!"<<endl;
2446 exit(EXIT_FAILURE);
2448 if(maq && phy)
2450 cerr<<"-maq should only be used with -cvg other than -phy!"<<endl;
2451 exit(EXIT_FAILURE);
2453 if(maq && phy)
2455 cerr<<"-m8 should only be used with -cvg other than -phy!"<<endl;
2456 exit(EXIT_FAILURE);
2458 if(depth && depthsingle)
2460 cerr<<"Only one of the \"-depth\" or \"-depthsingle\" could be selected!"<<endl;
2461 exit(EXIT_FAILURE);
2463 if(depthsinglebin && depthsingle)
2465 cerr<<"Only one of the \"-depthsinglebin\" or \"-depthsingle\" could be selected!"<<endl;
2466 exit(EXIT_FAILURE);
2468 /*if(oPos.empty())
2470 cerr<<"\"-o\" should be defined!"<<endl;
2471 exit(EXIT_FAILURE);
2473 if(duplicate && depthinput)
2475 cerr<<"\"-duplication\" can't be used with \"-depthinput\"!"<<endl;
2476 exit(EXIT_FAILURE);
2478 if(sp && !pesupport && (!il_single || !il_soap))
2480 cerr<<"\"-sp\" should be used with \"-il_single\" & \"il_soap\"!"<<endl;
2481 exit(EXIT_FAILURE);
2483 if(sp && phy && !pesupport)
2485 cerr<<"\"-sp\" should not be used with \"-phy\""<<endl;
2486 exit(EXIT_FAILURE);
2488 if(!phy && pesupport)
2490 cerr<<"\"-pesupport\" should be used with \"-phy\""<<endl;
2491 exit(EXIT_FAILURE);
2493 if(refsingle)
2494 AddRefSingle();
2495 if(reffastat)
2496 AddFastat();
2498 //Opening General Output file
2499 if(oPos.empty())
2500 cerr<<"General output (-o) undefined, disabled!"<<endl;
2501 else
2503 OutData.open(oPos.c_str());
2504 if(!OutData)
2506 cerr<<"Error opening output file!"<<endl;
2507 exit(EXIT_FAILURE);
2512 void AddRefSingle()
2514 string temp;
2515 string ctemp;
2516 igzstream InData(rPos.c_str());
2517 if(!InData)
2519 cerr<<"Error opening Reference file!";
2520 exit(1);
2522 cout<<"Picking out segments from reference file..."<<endl
2523 <<"Number of segments:"<<setw(10)<<'0'<<flush;
2524 for(register uint i(0); InData ;++i)
2526 ulong seg_length(0);
2527 if(!i)
2528 getline(InData, ctemp, '\n');
2529 if(ctemp[0] == '>')
2531 temp=ctemp;
2532 temp=temp.substr(1,(temp.find_first_of(" \t")-1));
2533 while(true)
2535 getline(InData, ctemp, '\n');
2536 if(!InData) break;
2537 if(ctemp[0] == '>') break;
2538 seg_length += ctemp.size();
2540 totalBP += (seg_length - trim * 2);
2541 if(!nowarning && refsingle_marker.find(temp) != refsingle_marker.end())
2543 cerr<<"Error! There are repeating segment name in reference!"<<endl;
2544 exit(EXIT_FAILURE);
2546 refsingle_marker.insert(make_pair<string, ulong>(temp, seg_length + 1));
2547 cout<<"\b\b\b\b\b\b\b\b\b\b"
2548 <<setw(10)<<refsingle_marker.size()<<flush;
2550 else
2552 cerr<<"Marker \">\" not found in line "<<ctemp<<"in refsingle!"<<endl;
2553 exit(EXIT_FAILURE);
2555 chr.insert(temp);
2557 InData.clear();
2558 InData.close();
2559 cout<<endl;
2562 void AddFastat()
2564 string temp;
2565 string ctemp;
2566 igzstream InData(rPos.c_str());
2567 if(!InData)
2569 cerr<<"Error opening Reference file!";
2570 exit(1);
2572 cout<<"Picking out segments from FaStat file..."<<endl
2573 <<"Number of segments:"<<setw(10)<<'0'<<flush;
2574 for(register uint i(0); InData ;++i)
2576 string name, size, size_withoutn;
2577 for(;;)
2579 InData>>name>>size>>size_withoutn;
2580 if(!InData)
2581 break;
2582 //name=name.substr(1,(temp.find_first_of(" \t")-1));
2583 totalBP += (atol(size.c_str()) - trim * 2);
2584 if(!nowarning && refsingle_marker.find(temp) != refsingle_marker.end())
2586 cerr<<"Error! There are repeating segment name in FaStat!"<<endl;
2587 exit(EXIT_FAILURE);
2589 refsingle_marker.insert(make_pair<string, ulong>(name, atol(size.c_str()) + 1));
2590 cout<<"\b\b\b\b\b\b\b\b\b\b"
2591 <<setw(10)<<refsingle_marker.size()<<flush;
2592 chr.insert(name);
2595 InData.clear();
2596 InData.close();
2597 cout<<endl;
2600 void Output_list()
2602 int i(1);
2603 cout<<"Output List:"<<endl;
2604 cout<<setw(2)<<i++<<": "<<oPos<<endl;
2605 if(depth)
2606 cout<<setw(2)<<i++<<": "<<dPos<<"/*"<<endl;
2607 if(depthsingle)
2608 cout<<setw(2)<<i++<<": "<<dPos<<endl;
2609 if(cdsinput)
2610 cout<<setw(2)<<i++<<": "<<cdsPos_out<<endl;
2613 void ComfirmAllDigit(const char* rhs)
2615 for(register uint i(0); i<strlen(rhs); ++i)
2617 if(!isdigit(rhs[i]))
2619 cerr<<rhs<<" is not a number!"<<endl;
2620 exit(EXIT_FAILURE);
2625 inline uint SeperateCigar(string & str, vector<uint> & vec)
2627 string pos;
2628 uint character(0);
2629 uint accumulate(0);
2630 for(register int i(0); i < str.size(); ++i)
2632 if(isalpha(str[i]) || str[i] == '-')
2634 if(!pos.empty())
2635 accumulate += atoi(pos.c_str());
2636 vec.push_back(accumulate + character);
2637 pos.clear();
2638 ++character;
2640 else
2641 pos += str[i];
2644 return character;