2 ******************************************************************************
7 *CREATE DATE : 2010-8-9
8 *CLASS NAME: FileListManager
9 *FUNCTION : a class that can control file list. support open, readline, close etc
11 *FILE NAME : FileListManager.cpp
12 *UPDATE DATE : 2010-8-19
13 *UPDATE BY : Bill Tang
14 *UPDATE DATE : 2010-9-3
15 *UPDATE BY : Bill Tang
17 *UPDATE: 2010-9-3 add class member m_endfile_count, change function readWin,
18 * readWinFromSoap and readWinFromSam.
19 * Make it can judge the files number that reach tail.
20 *UPDATE: 2010-10-4 change fucntion closeAliFiles, make it can close matrix files and delete all object!
21 *UPDATE: 2010-10-8 change function openCnsFile: add the function that if don't need output consensus, create empty objects.
22 *UPDATE: 2010-11-2 change the m_soap_file_vec's type, to vector<*igzstream>.
23 change the interface which is related to soap file.
25 *UPDATE: 2010-11-15 add the sem's control function.
26 *******************************************************************************
29 #include "FileListManager.h"
31 #include <sys/resource.h>
34 FileListManager::FileListManager()
38 m_soap_file_vec
.clear();
39 m_sam_file_vec
.clear();
40 m_cns_file_vec
.clear();
47 FileListManager::~FileListManager()
49 /*delete f_output stream*/
50 for (vector
<gzoutstream
*>::iterator iter
= m_cns_file_vec
.begin();
51 iter
!= m_cns_file_vec
.end();
57 /*delete f_input stream*/
58 for (vector
<igzstream
*>::iterator iter
= m_soap_file_vec
.begin();
59 iter
!= m_soap_file_vec
.end();
66 for (vector
<SamCtrl
*>::iterator iter
= m_sam_file_vec
.begin();
67 iter
!= m_sam_file_vec
.end();
73 for (vector
<fstream
*>::iterator iter
= m_matrix_file_vec
.begin();
74 iter
!= m_matrix_file_vec
.end();
84 * FUNCTION: open output file function.
85 * PARAMETER: path: the output consensus files path. mode: the open mode.
86 * RETURN: the number of consensus files that be opened.
88 int FileListManager::openCnsFile(const string
& path
, const string
& outdir
, const char* mode
, Parameter
* para
)
90 string cns_Path
, cns_name
; //the one consensus file name
91 gzoutstream
* f_output
;
93 // if don't need to out put consensus, then create empty objects.
96 for (int i
= 0; i
< m_file_number
; ++i
)
98 // create new ofstream object.update by zhukai on 2010-12-09
101 f_output
= new myogzstream();
105 f_output
= new myofstream();
107 m_cns_file_vec
.push_back(f_output
);
109 return m_cns_file_vec
.size();
112 m_base_file
.open(path
.c_str()); //open filelist
113 if (!m_base_file
) //can not open filelist
115 cerr
<< "Cannot open file:" << path
<< endl
;
116 return BASEFILE_ERROR
;
119 string cmd
= "mkdir -p " + outdir
;
122 /*open each cnsfile and push in cns vector*/
123 while (getline(m_base_file
, cns_Path
))
125 //f_output = new gzoutstream();
127 /*find cons name from cns path*/
128 int pos
= cns_Path
.rfind('/');
130 //update by zhukai on 2010-12-09
133 f_output
= new myogzstream();
134 cns_name
= outdir
+ cns_Path
.substr(pos
, string::npos
) + ".consus.gz";
138 f_output
= new myofstream();
139 cns_name
= outdir
+ cns_Path
.substr(pos
, string::npos
) + ".consus";
144 /*put cns in to cns vector*/
145 f_output
->open(cns_name
.c_str(),std::ios::out
);
147 if (!f_output
->is_open())
149 cerr
<< "Cannot open file:" << cns_name
<< endl
;
150 return CNSFILE_ERROR
;
153 // set the output format. 2010-12-15 Bill
154 f_output
->set_f(std::ios::showpoint
);
155 m_cns_file_vec
.push_back(f_output
);
158 m_base_file
.close(); //close consensus files list
160 return m_cns_file_vec
.size();
166 * FUNCTION: open all the file in the file list
167 * PARAMETER: listfilename: the path of file that contain list of infile. mode: the open mode.
170 int FileListManager::openAli(const string
& listfilename
,const char* mode
)
172 if (mode
[0] == 'r') //is sam/bam file
174 return openSamFile(listfilename
,mode
); //open sam/bam file ,return file number
178 //cerr << "open soap file" <<endl;
179 return openSoapFile(listfilename
); //open soap file , return file number
186 * FUNCTION: open sam/bam file
187 * PARAMETER: mode: the open mode.
188 * RETURN: BASEFILE_ERROR when cannot open file list. SAMFILE_ERROR when cannot open sam file.
190 int FileListManager::openSamFile(const string
& listfilename
, const char* mode
)
195 m_base_file
.open(listfilename
.c_str()); //open sam/bam files list
198 cerr
<< "Cannot open file:" << listfilename
<< endl
;
199 return BASEFILE_ERROR
;
202 /*open each sam/bam file in the filelist and input sam vector*/
203 while (m_base_file
>> filepath
)
206 // if(setlimit(m_sam_file_vec.size()))
209 sctrl
= new SamCtrl();
210 if (!(sctrl
->open(filepath
.c_str(), mode
)))
212 cerr
<< "Cannot open file:" << filepath
<< endl
;
213 return SAMFILE_ERROR
;
215 m_sam_file_vec
.push_back(sctrl
);
219 // return SETLIMIT_ERROR;
222 m_file_number
= m_sam_file_vec
.size(); // bam/sam file number in file list
223 m_base_file
.close(); //close sam/bam files list
225 return m_file_number
;
231 * FUNCTION: open soap format file.
233 * RETURN: BASEFILE_ERROR when cannot open file list. SAMFILE_ERROR when cannot open soap file.
235 int FileListManager::openSoapFile(const string
& listfilename
)
240 m_base_file
.open(listfilename
.c_str()); //open soap filelist
243 cerr
<<"Cannot open file:" <<listfilename
<<endl
;
244 return BASEFILE_ERROR
;
246 /*open each soap file in the filelist and input soap vector*/
247 while (m_base_file
>> filepath
)
250 //if ( setlimit( m_soap_file_vec.size() ) )
252 f_input
= new igzstream();
253 f_input
->open(filepath
.c_str()) ;
256 cerr
<< "Cannot open file:" << filepath
<< endl
;
257 return SOAPFILE_ERROR
;
259 m_soap_file_vec
.push_back(f_input
);
264 // return SETLIMIT_ERROR;
268 m_file_number
= m_soap_file_vec
.size(); // soap file number in file list
269 m_base_file
.close(); //close soap listfile
271 return m_file_number
;
277 * FUNCTION: read a line from the *vec[index]
278 * PARAMETER: line: the read buffer. index: the index of file handles vec.
279 * RETURN: the length of string line.
281 int FileListManager::readLine(string
& line
, const int index
)
289 * FUNCTION: close all the files
292 * UPDATE ON 2010-10-4
293 * delete the vector object and add delete m_matrix_file_vec.
295 void FileListManager::closeAliFiles(void)
300 while (vec_size
< m_soap_file_vec
.size())
302 m_soap_file_vec
[vec_size
]->close();
303 delete m_soap_file_vec
[vec_size
];
308 /*close sam/bam files*/
309 while(vec_size
< m_sam_file_vec
.size())
311 m_sam_file_vec
[vec_size
]->close();
312 delete m_sam_file_vec
[vec_size
];
317 /*close sam/bam files*/
318 while(vec_size
< m_matrix_file_vec
.size())
320 m_matrix_file_vec
[vec_size
]->close();
321 delete m_matrix_file_vec
[vec_size
];
325 m_soap_file_vec
.clear();
326 m_sam_file_vec
.clear();
327 m_matrix_file_vec
.clear();
332 * FUNCTION: close all the files
336 void FileListManager::closeCnsFiles(void)
340 while (vec_size
< m_cns_file_vec
.size())
342 m_cns_file_vec
[vec_size
]->close();
351 * FUNCTION: read lines to the readwin_vec. The lines' start position must inside a win size.
352 * When the lines' end position exceed the win,put it to the next win.
353 * PARAMETER: readwin_vec: the vector of Readwin objs. storing the info of every readwin.
354 * RETURN: COUNT_ERROR when readwin_vec.size() not equal to the files' number.
356 int FileListManager::readWin(vector
<Readwin
>& readwin_vec
, Parameter
* para
)
358 //CThreadPool threadpool(para->CPU);
359 if (readwin_vec
.size() != m_cns_file_vec
.size())
361 // file size not right.
365 // get signal pointers.
366 sem_t
* sem_read_p
= &(para
->sem_read
);
367 sem_t
* sem_call_cns_p
= &(para
->sem_call_cns
);
368 sem_t
* sem_readwin_return_p
= &(para
->sem_readwin_return
);
370 ////CThreadPool threadpool(cpu);
371 //READ_WIN_ARGS * read_win_args = NULL;
372 //Read_win_Task * rw_Task = NULL;
373 //// two vector used to release resource.
374 //vector<READ_WIN_ARGS*> read_win_args_vec ;
375 //vector<Read_win_Task*> rw_Task_vec ;
379 // waiting for the signal.
380 sem_wait(sem_read_p
);
381 sem_wait(sem_readwin_return_p
);
383 if (m_soap_file_vec
.size() == readwin_vec
.size())
385 for (int i
= 0; i
< readwin_vec
.size(); ++i
)
387 // read records from soap file to the win.
388 ret
= readWinFromSoap(readwin_vec
[i
], *m_soap_file_vec
[i
]);
390 if (COME_NEW_CHR
== ret
)
392 para
->ret
= COME_NEW_CHR
;
394 //return COME_NEW_CHR;
396 else if (FILE_END
== ret
)
402 // int *ret = new int[m_file_number];
403 // memset(ret, 0, sizeof(int) * m_file_number);
404 // for (int i = 0; i < readwin_vec.size(); ++i)
406 // // read records from soap file to the win.
407 // read_win_args = new READ_WIN_ARGS(&readwin_vec[i], m_soap_file_vec[i],NULL, ret, i);
408 // read_win_args_vec.push_back(read_win_args);
409 // rw_Task = new Read_win_Task();
410 // rw_Task->SetData(read_win_args);
411 // rw_Task_vec.push_back(rw_Task);
412 // threadpool.AddTask(rw_Task);
414 // while (threadpool.getTaskSize() != 0)
419 // for (int j = 0; j < rw_Task_vec.size(); ++j)
421 // delete read_win_args_vec[j];
422 // delete rw_Task_vec[j];
424 // read_win_args_vec.clear();
425 // rw_Task_vec.clear();
426 ////cerr << __FUNCTION__ << __LINE__ << endl;
428 // for (int i = 0; i < readwin_vec.size(); ++i)
430 // if (COME_NEW_CHR == ret[i])
432 // para->ret = COME_NEW_CHR;
433 // //return COME_NEW_CHR;
435 // else if (FILE_END == ret[i])
437 // m_endfile_count ++;
440 ////cerr << __FUNCTION__ << __LINE__ << endl;
443 else if (m_sam_file_vec
.size() == readwin_vec
.size())
445 for (int i
= 0; i
< readwin_vec
.size(); ++i
)
447 // read records from soap file to the win.
448 ret
= readWinFromSam(readwin_vec
[i
], *m_sam_file_vec
[i
]);
449 if (COME_NEW_CHR
== ret
)
451 para
->ret
= COME_NEW_CHR
;
453 //return COME_NEW_CHR;
455 else if (FILE_END
== ret
)
460 //int *ret = new int[m_file_number];
461 //memset(ret, 0, sizeof(int) * m_file_number);
462 //for (int i = 0; i < readwin_vec.size(); ++i)
464 // read_win_args = new READ_WIN_ARGS(&readwin_vec[i], NULL, m_sam_file_vec[i], ret, i);
465 // read_win_args_vec.push_back(read_win_args);
466 // rw_Task = new Read_win_Task();
467 // rw_Task->SetData(read_win_args);
468 // rw_Task_vec.push_back(rw_Task);
469 // threadpool.AddTask(rw_Task);
471 //while (threadpool.getTaskSize() != 0)
476 //for (int j = 0; j < rw_Task_vec.size(); ++j)
478 // delete read_win_args_vec[j];
479 // delete rw_Task_vec[j];
481 //read_win_args_vec.clear();
482 //rw_Task_vec.clear();
484 //for (int i = 0; i < readwin_vec.size(); ++i)
486 // if (COME_NEW_CHR == ret[i])
488 // para->ret = COME_NEW_CHR;
489 // // return COME_NEW_CHR;
491 // else if (FILE_END == ret[i])
493 // m_endfile_count ++;
500 // something wrong with input files.
501 para
->ret
= INPUT_ERROR
;
502 //return INPUT_ERROR;
505 if (m_endfile_count
!= m_file_number
)
512 // all file reach tail.
513 para
->ret
= FILE_END
;
514 sem_post(sem_call_cns_p
);
520 sem_post(sem_call_cns_p
);
526 // get the soap file handle pointer m_soap_file_vec[index]
529 * FUNCTION: get the soap file handle pointer m_soap_file_vec[index]
530 * PARAMETER: index: the vector index.
531 * RETURN: return soap file pointer m_soap_file_vec[index].failed return INDEX_OVER_FLOW
533 igzstream
* FileListManager::getSoapFile(const int index
)
535 if (index
< 0 || index
>= m_soap_file_vec
.size())
538 return INDEX_OVER_FLOW
;
541 return m_soap_file_vec
[index
];
547 * FUNCTION: get the soap file handle pointer m_soap_file_vec[index]
548 * PARAMETER: index: the vector index.
549 * RETURN: return sam/bam file pointer m_soap_file_vec[index].failed return INDEX_OVER_FLOW
551 SamCtrl
* FileListManager::getSamFile(const int index
)
553 if (index
< 0 || index
>= m_sam_file_vec
.size())
556 return INDEX_OVER_FLOW
;
559 return m_sam_file_vec
[index
];
565 * FUNCTION: read record from soapfile and send to the readwin.
566 * PARAMETER: readwin: the Readwin object that would be processed.
567 * soapfile: the input file handls.
568 * RETURN: FILE_END when file is over else 0.
570 int FileListManager::readWinFromSoap(Readwin
& readwin
, igzstream
& soapfile
)
572 readwin
.winChange(); // start from next win.
573 std::string line
; // a temp string object.
574 int ret
; // store the flag from function Readwin::addRead.
577 // judge if the win can be added.
578 if (readwin
.isAbleToAdd())
580 // read record form file.
581 while (getline(soapfile
,line
))
583 ret
= readwin
.addRead(line
);
585 if (ADD_SUCCESSFUL
== ret
)
590 else if (READ_EXCEED_WIN
== ret
)
595 else if (READ_POS_ERROR
== ret
)
597 // something error with the record's position.
598 cerr
<< "ERROR : Something wrong with the read's position with line:" << endl
;
599 cerr
<< line
<< endl
;
603 else if (COME_NEW_CHR
== ret
)
605 if (readwin
.getLastCount() == m_file_number
)
607 // all the samples are reach a new chromosome.
616 readwin
.setLastPos(FILE_END
);
626 * FUNCTION: read record from samCtrl and send to the readwin.
627 * PARAMETER: readwin: the Readwin object that would be processed.
628 * samCtrl: the input file handls.
629 * RETURN: FILE_END when file is over else 0.
631 int FileListManager::readWinFromSam(Readwin
& readwin
, SamCtrl
& samCtrl
)
633 readwin
.winChange(); // start from next win.
634 std::string line
; // a temp string object.
635 int ret
; // store the flag from function Readwin::addRead.
636 int size
= 0; // store the readline flag.
638 // judge if the win can be added.
639 if (readwin
.isAbleToAdd())
641 // read record form file.
642 while ((size
= samCtrl
.readline(line
)) != -1)
644 line
= alignment_format(line
);
645 // add record to the Readwin object.
646 ret
= readwin
.addRead(line
);
648 if (ADD_SUCCESSFUL
== ret
)
653 else if (READ_EXCEED_WIN
== ret
)
658 else if (READ_POS_ERROR
== ret
)
660 // something error with the record's position.
661 cerr
<< "ERROR : Something wrong with the read's position with line:" << endl
;
662 cerr
<< line
<< endl
;
666 else if (COME_NEW_CHR
== ret
)
668 if (readwin
.getLastCount() == m_file_number
)
670 // all the samples are reach a new chromosome.
680 readwin
.setLastPos(FILE_END
);
690 // get the file number
693 * FUNCTION: get the file number
695 * RETURN: m_file_number
697 int FileListManager::getFileNum(void)
699 return m_file_number
;
705 * FUNCTION: get the consensus file handle pointer m_cns_file_vec[index]
706 * PARAMETER: index: the vector index.
707 * RETURN: return consensus file pointer m_cns_file_vec[index].failed return INDEX_OVER_FLOW
709 gzoutstream
* FileListManager::getCnsFile(const int index
)
711 if (index
< 0 || index
>= m_cns_file_vec
.size())
714 return INDEX_OVER_FLOW
;
717 return m_cns_file_vec
[index
];
723 * FUNCTION: open matrix files with open_mode mode
724 * PARAMETER: matrix_list: the output or input matrix files path. mode: the open mode.
725 * alignment_list: alignment file list.
726 * RETURN: the number of matrix files that be opened.
728 int FileListManager::openMatrixFile(const std::string matrix_list
, std::ios_base::open_mode mode
, const std::string alignment_list
)
731 if (mode
== std::ios::in
) //matrix file is existed , read it
734 m_base_file
.open(matrix_list
.c_str()); //open matrix filelist
737 cerr
<<"Cannot open file:" << matrix_list
<< endl
;
738 return BASEFILE_ERROR
;
740 //cerr <<"open matrix file 642"<<endl;
741 /*open each matrix file in the filelist and input soap vector*/
742 while (m_base_file
>> filepath
)
744 //cerr <<"open matrix file 646"<<endl;
746 //cerr <<"open matrix file 647"<<endl;
747 fsp
->open(filepath
.c_str(), fstream::in
) ;
748 //cerr <<"open matrix file 649"<<endl;
751 cerr
<< "Cannot open file:" << filepath
<< endl
;
752 return SOAPFILE_ERROR
;
754 m_matrix_file_vec
.push_back(fsp
);
760 else if (alignment_list
!= "")
762 string mat_Path
, mat_name
; //the one matrix file name
764 m_base_file
.open(alignment_list
.c_str()); //open matrix file path
768 cerr
<< "Cannot open file:" << alignment_list
<< endl
;
769 return BASEFILE_ERROR
;
772 string cmd
= "mkdir -p " + matrix_list
;
774 string list_path
= matrix_list
+ "/matrix_list";
775 ofstream
list_out(list_path
.c_str());
777 /*open each matrix file and push in cns vector*/
778 while (getline(m_base_file
, mat_Path
))
781 /*find cons name from cns path*/
782 int pos
= mat_Path
.rfind('/');
783 mat_name
= matrix_list
+ mat_Path
.substr(pos
, string::npos
) + ".matrix";
784 /*put cns in to cns vector*/
785 fsp
->open(mat_name
.c_str(), fstream::out
);
789 cerr
<< "Cannot open file:" << mat_name
<< endl
;
790 return CNSFILE_ERROR
;
793 list_out
<< mat_name
<< endl
;
794 m_matrix_file_vec
.push_back(fsp
);
800 return m_matrix_file_vec
.size();
806 * FUNCTION: get the matrix file handle pointer m_matrix_file_vec[index]
807 * PARAMETER: index: the vector index.
808 * RETURN: return matrix file pointer m_matrix_file_vec[index].failed return INDEX_OVER_FLOW
810 fstream
* FileListManager::getMatrixFile(const int index
)
812 if (index
>= m_matrix_file_vec
.size())
815 return INDEX_OVER_FLOW
;
818 return m_matrix_file_vec
[index
];
823 * FUNCTION: set max limit file number
824 * PARAMETER: filenumber : open filenumber now
825 * RETURN: return SETLIMIT_ERROR or return SETLIMIT_OK
827 //int FileListManager::setlimit(int filenumber)
831 // /*if open file number up to limit */
832 // if (filenumber >= r.rlim_max)
835 // r.rlim_cur = 2000;
836 // r.rlim_max = 2000;
838 // /*if set limit error*/
839 // if (setrlimit(RLIMIT_NOFILE, &r) < 0)
841 // cerr << "setrlimit error\n";
842 // return SETLIMIT_ERROR;
845 // return SETLIMIT_OK;
852 int Read_win_Task::Run()
854 READ_WIN_ARGS
*read_win_args
= (READ_WIN_ARGS
*)this->m_ptrData
;
856 Readwin
& readwin
= *(read_win_args
->readwin_p
);
857 igzstream
* m_soap_file
= read_win_args
->m_soap_file_p
;
858 SamCtrl
* m_sam_file
= read_win_args
->m_sam_file_p
;
859 int * ret
= read_win_args
->ret_p
;
860 int index
= read_win_args
->index
;
861 if (read_win_args
->m_sam_file_p
== NULL
)
863 ret
[index
] = fileListManager
.readWinFromSoap(readwin
, *m_soap_file
);
867 ret
[index
] = fileListManager
.readWinFromSam(readwin
,* m_sam_file
);
874 * FUNCTION: a thread function used to run FlieListManager::readWin function.
875 * PARAMETER: __Args the parameter structure.
878 void *_flieListManager_readWin(void * __Args
)
881 BIG_READ_WIN_ARGS
* args
= (BIG_READ_WIN_ARGS
*)__Args
;
883 args
->fileListManager
->readWin((*args
->readwin_vec
), args
->para
);