modified: nfig1.py
[GalaxyCodeBases.git] / BGI / BASE / src / 2bwt / 2BWT-Builder.c
blobb1809a8c32ffd3d91ae2bb1ff695a2b7ddb1f270
1 /*
3 2BWT-Builder.c Build index for FASTA database
5 This program builds index for FASTA database for use of BWTBlastn.
7 Copyright (C) 2006, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 Date : 19th June 2011
24 Author : Edward MK Wu
25 Change : Packaging 2BWT library as a separate product.
26 Thus, changing all references to 2bwt lib to subdirectory.
28 Date : 23rd October 2011
29 Author : Edward MK Wu
30 Change : Fix a rounding error when building reverse packed sequence.
34 #include <stdio.h>
35 #include <stdlib.h>
37 #include "TypeNLimit.h"
38 #include "BWTConstruct.h"
39 #include "MiscUtilities.h"
40 #include "DNACount.h"
41 #include "TextConverter.h"
42 #include "MemManager.h"
43 #include "iniparser.h"
44 #include "HSP.h"
45 #include "Timing.h"
47 // Database and ini
48 dictionary *ParseInput(int argc, char** argv);
49 void ParseIniFile(char *iniFileName);
50 void ProcessIni();
51 void ValidateIni();
52 void PrintIni();
53 void PrintShortDesc();
54 void PrintHelp();
56 void ProcessFileName(char *outputFileName, const char *inputFileName, const char *databaseName);
58 // Parameters
59 char IniFileName[MAX_FILENAME_LEN+1];
60 int Confirmation;
62 // BuildTasks parameters
63 int ParseFASTA = TRUE;
64 int BuildBWT = TRUE;
65 int BuildSaValue = TRUE;
66 int BuildSaIndex = FALSE;
68 // Memory parameters
69 unsigned long long PoolSize = 2097152; // 2M - fixed; not configurable through ini
71 // Display parameters
72 int ShowProgress = FALSE;
74 // Database parameters
75 char FASTAFileName[MAX_FILENAME_LEN+1] = "";
76 char DatabaseName[MAX_FILENAME_LEN+1] = "";
77 char AnnotationFileName[MAX_FILENAME_LEN+1] = "*.index.ann";
78 char AmbiguityFileName[MAX_FILENAME_LEN+1] = "*.index.amb";
79 char TranslateFileName[MAX_FILENAME_LEN+1] = "*.index.tra";
80 char PackedDNAFileName[MAX_FILENAME_LEN+1] = "*.index.pac";
81 char BWTCodeFileName[MAX_FILENAME_LEN+1] = "*.index.bwt";
82 char BWTOccValueFileName[MAX_FILENAME_LEN+1] = "*.index.fmv";
83 char SaValueFileName[MAX_FILENAME_LEN+1] = "*.index.sa";
84 char SaIndexFileName[MAX_FILENAME_LEN+1] = "*.index.sai";
86 char RevPackedDNAFileName[MAX_FILENAME_LEN+1] = "*.index.rev.pac";
87 char RevBWTCodeFileName[MAX_FILENAME_LEN+1] = "*.index.rev.bwt";
88 char RevBWTOccValueFileName[MAX_FILENAME_LEN+1] = "*.index.rev.fmv";
90 // Parse FASTA parameters
91 unsigned long long FASTARandomSeed = 0;
92 int MaskLowerCase = FALSE;
94 // Build BWT parameters
95 unsigned int OccValueFreq = 256;
96 float TargetNBit = 2.5;
97 unsigned long long InitialMaxBuildSize = 10000000;
98 unsigned long long IncMaxBuildSize = 10000000;
100 // Build SA value parameters
101 unsigned int SaValueFreq = 8;
103 // Build SA index parameters
104 unsigned int SaIndexNumOfChar = 12;
106 void printBinary(unsigned long long seq,int len) {
107 char text[64];
108 int i,j=63;
109 for (i=0;i<len;i++) {
110 text[j--]=seq %2;
111 seq>>=1;
113 for (i=j+1;i<64;i++) {
114 if (text[i]==0) printf("0");
115 if (text[i]==1) printf("1");
116 if ((i-j-1) % 4 ==3) printf(" ");
117 }printf("\n");
121 void BuildReversePacked(const char *inputFileName, unsigned long long *textLength, const unsigned int convertToWordPacked, const unsigned int trailerBufferInWord) {
123 FILE *inputFile;
124 FILE *outputFile;
125 unsigned char * packedText;
126 unsigned char * revPackedText;
127 off64_t packedFileLen;
128 unsigned char lastByteLength;
129 long long i,j;
130 int k,l;
132 inputFile = (FILE*)(FILE*)fopen64(inputFileName, "rb");
133 outputFile = (FILE*)(FILE*)fopen64(RevPackedDNAFileName, "wb");
135 if (inputFile == NULL) {
136 fprintf(stderr, "BuildReversePacked() : Cannot open inputFileName!\n");
137 exit(1);
140 fseek(inputFile, -1, SEEK_END);
141 packedFileLen = ftello64(inputFile);
142 if (packedFileLen == -1) {
143 fprintf(stderr, "BuildReversePacked(): Cannot determine file length!\n");
144 exit(1);
148 fread(&lastByteLength, sizeof(unsigned char), 1, inputFile);
149 *textLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
151 if (ShowProgress) {
152 printf("Packed file size = %llu\n",(unsigned long long) packedFileLen);
153 printf("Text Length = %llu\n",*textLength);
156 unsigned long long byteToProcess = (*textLength+CHAR_PER_BYTE-1) / CHAR_PER_BYTE;
157 packedText = (unsigned char*) malloc(byteToProcess+1);
158 revPackedText = (unsigned char*) malloc(byteToProcess+1);
160 fseek(inputFile, 0, SEEK_SET);
161 fread(packedText, 1, packedFileLen, inputFile);
162 fclose(inputFile);
163 i=byteToProcess-1;
164 j=0;
165 revPackedText[j]=0;
166 k=0;
168 unsigned char allOneChar = (1<<BIT_PER_CHAR) - 1;
169 if (lastByteLength>0) {
170 unsigned char lastByte = packedText[i];
171 lastByte >>= (CHAR_PER_BYTE - lastByteLength)*BIT_PER_CHAR;
172 for (k=0;k<lastByteLength;k++) {
173 revPackedText[j]<<=BIT_PER_CHAR;
174 revPackedText[j]|=(lastByte & allOneChar);
175 lastByte>>=BIT_PER_CHAR;
177 i--;
180 for (;i>=0;i--) {
181 unsigned char lastByte = packedText[i];
182 for (l=0;l<CHAR_PER_BYTE;l++) {
183 revPackedText[j]<<=BIT_PER_CHAR;
184 revPackedText[j]|=(lastByte & allOneChar);
185 k++;
186 if (k>=CHAR_PER_BYTE) {
187 j++;
188 k=0;
189 revPackedText[j]=0;
191 lastByte>>=BIT_PER_CHAR;
195 if (k!=0) {
196 revPackedText[j]<<=(CHAR_PER_BYTE - k)*BIT_PER_CHAR;
199 fwrite(revPackedText,sizeof(unsigned char),byteToProcess,outputFile);
200 if (lastByteLength==0) {
201 fwrite(&lastByteLength,sizeof(unsigned char),1,outputFile);
203 fwrite(&lastByteLength,sizeof(unsigned char),1,outputFile);
205 free(revPackedText);
206 free(packedText);
207 fclose(outputFile);
208 //*/
211 int main(int argc, char** argv) {
214 char c;
215 MMPool *mmPool;
216 dictionary *programInput;
217 double startTime;
218 double elapsedTime = 0, totalElapsedTime = 0;
220 char filename[MAX_FILENAME_LEN+1];
221 BWT *bwt = NULL;
222 BWT *rev_bwt = NULL;
223 HSP *hsp = NULL;
224 unsigned long long textLength = 0;
225 unsigned long long numSeq;
227 BWTInc *bwtInc = NULL;
228 BWTInc *rev_bwtInc = NULL;
230 // Program input
231 programInput = ParseInput(argc, argv);
232 PrintShortDesc();
234 // Ini
235 if (strcmp(argv[0] + strlen(argv[0]) - 4, ".exe") == 0) {
236 *(argv[0] + strlen(argv[0]) - 4) = '\0';
238 sprintf(filename, "%s.ini", argv[0]);
239 ParseIniFile(filename);
240 //printf("\n");
241 ProcessIni();
242 ValidateIni();
243 PrintIni();
245 if (Confirmation == TRUE) {
246 printf("Press Y to go or N to cancel. ");
247 c = (char)getchar();
248 while (c != 'y' && c != 'Y' && c != 'n' && c!= 'N') {
249 c = (char)getchar();
251 if (c == 'n' || c == 'N') {
252 exit(0);
256 startTime = setStartTime();
258 MMMasterInitialize(1, 0, FALSE, NULL);
259 mmPool = MMPoolCreate(PoolSize);
261 // Parse FASTA file to produce packed DNA and annotation file
262 if (ParseFASTA == TRUE) {
264 printf("Parsing FASTA file..\n");
265 numSeq = HSPParseFASTAToPacked(FASTAFileName, AnnotationFileName, PackedDNAFileName, AmbiguityFileName, TranslateFileName, FASTARandomSeed, MaskLowerCase);
267 printf("Finished. Parsed %llu sequences.\n", numSeq);
269 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
270 printf("Elapsed time = ");
271 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
272 totalElapsedTime += elapsedTime;
273 printf("\n");
275 //Parse packed DNA to construct the packed reversed DNA
277 printf("Parsing FASTA file reverse..\n");
278 unsigned long long textLen;
279 BuildReversePacked(PackedDNAFileName,&textLen,TRUE,1);
280 //printf("Reversed Packed DNA generated..\n");
282 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
283 printf("Elapsed time = ");
284 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
285 totalElapsedTime += elapsedTime;
286 printf("\n");
290 // Construct BWTInc from text
291 if (BuildBWT == TRUE) {
293 printf("Building BWT..\n");
295 bwtInc = BWTIncConstructFromPacked(mmPool, PackedDNAFileName, ShowProgress,
296 TargetNBit, InitialMaxBuildSize, IncMaxBuildSize);
298 printf("Finished constructing BWT in %u iterations. ", bwtInc->numberOfIterationDone);
300 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
301 printf("Elapsed time = ");
302 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
303 totalElapsedTime += elapsedTime;
304 printf("\n");
306 printf("Saving BWT..\n");
307 BWTSaveBwtCodeAndOcc(bwtInc->bwt, BWTCodeFileName, BWTOccValueFileName);
308 printf("Finished saving BWT. ");
309 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
310 printf("Elapsed time = ");
311 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
312 totalElapsedTime += elapsedTime;
313 printf("\n");
315 textLength = bwtInc->bwt->textLength;
318 //Building Reversed BWT
319 printf("Building Reversed BWT..\n");
321 rev_bwtInc = BWTIncConstructFromPacked(mmPool, RevPackedDNAFileName, ShowProgress,
322 TargetNBit, InitialMaxBuildSize, IncMaxBuildSize);
324 printf("Finished constructing Reversed BWT in %u iterations. ", rev_bwtInc->numberOfIterationDone);
326 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
327 printf("Elapsed time = ");
328 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
329 totalElapsedTime += elapsedTime;
330 printf("\n");
332 printf("Saving BWT..\n");
333 BWTSaveBwtCodeAndOcc(rev_bwtInc->bwt, RevBWTCodeFileName, RevBWTOccValueFileName);
334 printf("Finished saving BWT. ");
335 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
336 printf("Elapsed time = ");
337 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
338 totalElapsedTime += elapsedTime;
339 printf("\n");
341 textLength = rev_bwtInc->bwt->textLength;
343 BWTIncFree(mmPool, bwtInc);
344 BWTIncFree(mmPool, rev_bwtInc);
347 // Load BWT
348 if (BuildSaValue || BuildSaIndex) {
350 printf("Loading BWT...\n");
352 bwt = BWTLoad(mmPool, BWTCodeFileName, BWTOccValueFileName, NULL, NULL, NULL, NULL);
353 //Use BWT to build the hash table
355 printf("Finished loading BWT. ");
357 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
358 printf("Elapsed time = ");
359 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
360 totalElapsedTime += elapsedTime;
361 printf("\n");
363 textLength = bwt->textLength;
368 if (BuildSaValue) {
370 printf("Building SA value...\n");
372 if (ShowProgress) {
373 BWTGenerateSaValue(bwt, SaValueFreq, bwt->textLength / SaValueFreq / 10);
374 } else {
375 BWTGenerateSaValue(bwt, SaValueFreq, 0);
377 BWTSaveSaValue(bwt, SaValueFileName);
379 printf("Finished building SA value. ");
381 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
382 printf("Elapsed time = ");
383 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
384 totalElapsedTime += elapsedTime;
385 printf("\n");
390 /*if (BuildSaIndex) {
392 printf("Building SA index...\n");
394 BWTGenerateCachedSaIndex(bwt, SaIndexNumOfChar, SaIndexFileName);
396 printf("Finished building SA index. ");
398 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
399 printf("Elapsed time = ");
400 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
401 totalElapsedTime += elapsedTime;
402 printf("\n");
406 // Free BWT
407 if (BuildSaValue || BuildSaIndex) {
408 BWTFree(mmPool, bwt);
412 // Finished all construction tasks
413 printf("Index building is completed.\n");
414 totalElapsedTime = getElapsedTime(startTime);
415 printf("Total elapsed time = ");
416 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, totalElapsedTime);
417 printf("\n");
419 //MMMasterPrintReport(stdout, FALSE, FALSE, FALSE);
420 if (BuildSaValue) {
421 //fprintf(stdout, "Number of char : %u\n", textLength);
422 //fprintf(stdout, "Bit per char : %.2f\n", (float)MMMasterMaxTotalByteDispatched() * BITS_IN_BYTE / textLength);
423 //printf("\n");
426 MMPoolFree(mmPool);
428 iniparser_freedict(programInput);
430 return 0;
434 dictionary *ParseInput(int argc, char** argv) {
436 dictionary *programInput;
437 char t1[3] = "-c"; // specify that this is a boolean type parameter
438 char t2[3] = "-U"; // specify that this is a boolean type parameter
439 char *d[2];
441 d[0] = t1;
442 d[1] = t2;
444 programInput = paraparser_load(argc, argv, 2, d); // 2 boolean type parameters
446 // Get database name
447 if (!iniparser_find_entry(programInput, "argument:1")) {
448 PrintHelp();
449 exit(1);
451 iniparser_copystring(programInput, "argument:1", DatabaseName, DatabaseName, MAX_FILENAME_LEN);
452 if (strlen(DatabaseName) + 4 > MAX_FILENAME_LEN) {
453 PrintHelp();
454 exit(1);
457 // Get FASTA file name
458 iniparser_copystring(programInput, "argument:2", FASTAFileName, DatabaseName, MAX_FILENAME_LEN);
459 if (strlen(FASTAFileName) > MAX_FILENAME_LEN) {
460 PrintHelp();
461 exit(1);
465 // Whether confirmation is needed
466 Confirmation = iniparser_find_entry(programInput, "parameter:-c");
468 MaskLowerCase = iniparser_find_entry(programInput, "parameter:-U");
470 return programInput;
474 void ParseIniFile(char *iniFileName) {
476 dictionary *ini;
478 //printf("Loading %s ..", iniFileName);
479 ini = iniparser_load(iniFileName, FALSE);
480 if (ini == NULL) {
481 // printf("not found.\n");
482 return;
484 //printf("done.\n");
486 // BuildTasks parameters
487 ParseFASTA = iniparser_getboolean(ini, "BuildTasks:ParseFASTA", ParseFASTA);
488 BuildBWT = iniparser_getboolean(ini, "BuildTasks:BuildBWT", BuildBWT);
489 BuildSaValue = iniparser_getboolean(ini, "BuildTasks:BuildSaValue", BuildSaValue);
490 BuildSaIndex = iniparser_getboolean(ini, "BuildTasks:BuildSaIndex", BuildSaIndex);
492 // Display parameters
493 ShowProgress = iniparser_getboolean(ini, "Display:ShowProgress", ShowProgress);
495 // Parse FASTA parameters
496 FASTARandomSeed = iniparser_getint(ini, "ParseFASTA:RandomSeed", FASTARandomSeed);
497 if (FASTARandomSeed == 0) {
498 FASTARandomSeed = getRandomSeed();
501 // Build BWT parameters
502 OccValueFreq = iniparser_getint(ini, "BuildBWT:OccValueFreq", OccValueFreq);
503 TargetNBit = (float)iniparser_getdouble(ini, "BuildBWT:TargetNBit", TargetNBit);
504 InitialMaxBuildSize = iniparser_getint(ini, "BuildBWT:InitialMaxBuildSize", InitialMaxBuildSize);
505 IncMaxBuildSize = iniparser_getint(ini, "BuildBWT:IncMaxBuildSize", IncMaxBuildSize);
507 // Build SA value parameters
508 SaValueFreq = iniparser_getint(ini, "BuildSAValue:SaValueFreq", SaValueFreq);
510 // Build SA index parameters
511 SaIndexNumOfChar = iniparser_getint(ini, "BuildSAIndex:SaIndexNumOfChar", SaIndexNumOfChar);
513 // Database parameters
514 iniparser_copystring(ini, "Database:AnnotationFileName", AnnotationFileName, AnnotationFileName, MAX_FILENAME_LEN);
515 iniparser_copystring(ini, "Database:AmbiguityFileName", AmbiguityFileName, AmbiguityFileName, MAX_FILENAME_LEN);
516 iniparser_copystring(ini, "Database:TranslateFileName", TranslateFileName, TranslateFileName, MAX_FILENAME_LEN);
517 iniparser_copystring(ini, "Database:PackedDNAFileName", PackedDNAFileName, PackedDNAFileName, MAX_FILENAME_LEN);
518 iniparser_copystring(ini, "Database:BWTCodeFileName", BWTCodeFileName, BWTCodeFileName, MAX_FILENAME_LEN);
519 iniparser_copystring(ini, "Database:BWTOccValueFileName", BWTOccValueFileName, BWTOccValueFileName, MAX_FILENAME_LEN);
520 iniparser_copystring(ini, "Database:SaValueFileName", SaValueFileName, SaValueFileName, MAX_FILENAME_LEN);
521 iniparser_copystring(ini, "Database:SaIndexFileName", SaIndexFileName, SaIndexFileName, MAX_FILENAME_LEN);
523 iniparser_copystring(ini, "Database:RevPackedDNAFileName", RevPackedDNAFileName, RevPackedDNAFileName, MAX_FILENAME_LEN);
524 iniparser_copystring(ini, "Database:RevBWTCodeFileName", RevBWTCodeFileName, RevBWTCodeFileName, MAX_FILENAME_LEN);
525 iniparser_copystring(ini, "Database:RevBWTOccValueFileName", RevBWTOccValueFileName, RevBWTOccValueFileName, MAX_FILENAME_LEN);
527 iniparser_freedict(ini);
531 void ProcessIni() {
533 ProcessFileName(AnnotationFileName, AnnotationFileName, DatabaseName);
534 ProcessFileName(AmbiguityFileName, AmbiguityFileName, DatabaseName);
535 ProcessFileName(TranslateFileName, TranslateFileName, DatabaseName);
536 ProcessFileName(PackedDNAFileName, PackedDNAFileName, DatabaseName);
537 ProcessFileName(RevPackedDNAFileName, RevPackedDNAFileName, DatabaseName);
538 ProcessFileName(BWTCodeFileName, BWTCodeFileName, DatabaseName);
539 ProcessFileName(RevBWTCodeFileName, RevBWTCodeFileName, DatabaseName);
540 ProcessFileName(BWTOccValueFileName, BWTOccValueFileName, DatabaseName);
541 ProcessFileName(RevBWTOccValueFileName, RevBWTOccValueFileName, DatabaseName);
542 ProcessFileName(SaValueFileName, SaValueFileName, DatabaseName);
543 ProcessFileName(SaIndexFileName, SaIndexFileName, DatabaseName);
547 void ValidateIni() {
549 if (!ParseFASTA && !BuildBWT && !BuildSaValue && !BuildSaIndex) {
550 fprintf(stderr, "No action is specified!\n");
551 exit(1);
553 if (ParseFASTA) {
554 if (PackedDNAFileName[0] == '\0') {
555 fprintf(stderr, "Packed DNA file name is not specified!\n");
556 exit(1);
558 if (AnnotationFileName[0] == '\0') {
559 fprintf(stderr, "Annotation file name is not specified!\n");
560 exit(1);
562 if (AmbiguityFileName[0] == '\0') {
563 fprintf(stderr, "Ambiguity file name is not specified!\n");
564 exit(1);
567 if (BuildBWT) {
568 if (PackedDNAFileName[0] == '\0') {
569 fprintf(stderr, "Packed DNA file is not specified!\n");
570 exit(1);
572 if (BWTCodeFileName[0] == '\0') {
573 fprintf(stderr, "BWT code file name is not specified!\n");
574 exit(1);
576 if (BWTOccValueFileName[0] == '\0') {
577 fprintf(stderr, "BWT Occ value file name is not specified!\n");
578 exit(1);
580 if (TargetNBit < 2.5) {
581 fprintf(stderr, "Target NBit should be at least 2.5!\n");
582 exit(1);
585 if (BuildSaValue) {
586 if (BWTCodeFileName[0] == '\0') {
587 fprintf(stderr, "BWT code file is not specified!\n");
588 exit(1);
590 if (BWTOccValueFileName[0] == '\0') {
591 fprintf(stderr, "BWT Occ value file is not specified!\n");
592 exit(1);
594 if (SaValueFileName[0] == '\0') {
595 fprintf(stderr, "SA value file name is not specified!\n");
596 exit(1);
598 if (SaValueFreq <= 0) {
599 fprintf(stderr, "SA value frequency must > 0!\n");
600 exit(1);
604 if (BuildSaIndex) {
605 if (BWTCodeFileName[0] == '\0') {
606 fprintf(stderr, "BWT code file is not specified!\n");
607 exit(1);
609 if (BWTOccValueFileName[0] == '\0') {
610 fprintf(stderr, "BWT Occ value file is not specified!\n");
611 exit(1);
613 if (SaIndexFileName[0] == '\0') {
614 fprintf(stderr, "SA index file name is not specified!\n");
615 exit(1);
617 if (SaIndexNumOfChar <= 0) {
618 fprintf(stderr, "SA index number of character must > 0!\n");
619 exit(1);
621 if (SaIndexNumOfChar > 13) {
622 fprintf(stderr, "SA index number of character must <= 13!\n");
623 exit(1);
629 void PrintIni() {
631 char boolean[2];
633 boolean[0] = 'N';
634 boolean[1] = 'Y';
636 /*printf("Parse FASTA file : %c\n", boolean[ParseFASTA]);
637 printf("Build BWT : %c\n", boolean[BuildBWT]);
638 printf("Build SA value : %c\n", boolean[BuildSaValue]);
639 printf("Build SA index : %c\n", boolean[BuildSaIndex]);
640 printf("\n");
642 printf("Show progress : %c\n", boolean[ShowProgress]);
643 printf("\n");
645 if (ParseFASTA) {
646 printf("Parse FASTA :\n");
647 printf("Mask lower case : %c\n", boolean[MaskLowerCase]);
648 printf("Random seed : %u\n", FASTARandomSeed);
649 printf("\n");
652 if (BuildBWT) {
653 printf("Build BWT :\n");
654 printf("Target N Bits : %.2f\n", TargetNBit);
655 printf("Occ value frequency : %u\n", OccValueFreq);
656 printf("Initial Max Build Size : %u Inc Max Build Size : %u\n",
657 InitialMaxBuildSize, IncMaxBuildSize);
658 printf("\n");
661 if (BuildSaValue) {
662 printf("Build SA value :\n");
663 printf("SA value frequency : %u\n", SaValueFreq);
664 printf("\n");
667 if (BuildSaIndex) {
668 printf("Build SA index :\n");
669 printf("SA index no. of char : %u\n", SaIndexNumOfChar);
670 printf("\n");
673 printf("Annotation file : %s\n", AnnotationFileName);
674 printf("Ambigurity file : %s\n", AmbiguityFileName);
675 printf("Packed DNA file : %s\n", PackedDNAFileName);
676 printf("BWT Code file : %s\n", BWTCodeFileName);
677 printf("BWT Occ value file : %s\n", BWTOccValueFileName);
678 printf("SA value file : %s\n", SaValueFileName);
679 printf("SA index file : %s\n", SaIndexFileName);
680 printf("\n");
681 printf("Reversed Packed DNA file : %s\n", RevPackedDNAFileName);
682 printf("Reversed BWT Code file : %s\n", RevBWTCodeFileName);
683 printf("Reversed BWT Occ value file : %s\n", RevBWTOccValueFileName);
684 printf("\n");*/
688 void PrintShortDesc() {
690 /*printf("BWTFormatdb v1.0, Copyright (C) 2006, Wong Chi Kwong.\n");
691 printf("BWTFormatdb comes with ABSOLUTELY NO WARRENTY.\n");
692 printf("BWTFormatdb is free software, and you are welcome to\n");
693 printf("redistribute it under certain conditions.\n");
694 printf("For details type BWTFormatdb.\n");
695 printf("\n");*/
699 void PrintHelp() {
701 /*printf("BWTFormatdb v1.0, Copyright (C) 2006, Wong Chi Kwong.\n");
702 printf("\n");
704 printf("This program is free software; you can redistribute it and/or\n");
705 printf("modify it under the terms of the GNU General Public License\n");
706 printf("as published by the Free Software Foundation; either version 2\n");
707 printf("of the License, or (at your option) any later version.\n");
708 printf("\n");
710 printf("This program is distributed in the hope that it will be useful,\n");
711 printf("but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
712 printf("MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n");
713 printf("GNU General Public License for more details.\n");
714 printf("\n");
716 printf("You should have received a copy of the GNU General Public License\n");
717 printf("along with this program; if not, write to the Free Software\n");
718 printf("Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n");
719 printf("\n");*/
721 printf("Syntax: 2bwt-builder <sequence file>\n");
725 void ProcessFileName(char *outputFileName, const char *inputFileName, const char *databaseName) {
727 char tempChar[MAX_FILENAME_LEN];
728 unsigned long long i;
730 if (inputFileName == NULL) {
731 if (outputFileName != inputFileName) {
732 outputFileName[0] = '\0';
734 return;
737 if (strlen(databaseName) + strlen(inputFileName) > MAX_FILENAME_LEN) {
738 fprintf(stderr, "File length is too long!\n");
739 exit(1);
742 strncpy(tempChar, inputFileName, MAX_FILENAME_LEN);
744 // locate the *
745 for (i=0; i<MAX_FILENAME_LEN; i++) {
746 if (tempChar[i] == '*') {
747 break;
750 if (i<MAX_FILENAME_LEN) {
751 tempChar[i] = '\0';
752 sprintf(outputFileName, "%s%s%s", tempChar, databaseName, tempChar + i + 1);
753 } else {
754 sprintf(outputFileName, "%s", tempChar);