3 2BWT-Builder.c Build index for FASTA database
5 This program builds index for FASTA database for use of BWTBlastn.
7 Copyright (C) 2006, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 Change : Packaging 2BWT library as a separate product.
26 Thus, changing all references to 2bwt lib to subdirectory.
28 Date : 23rd October 2011
30 Change : Fix a rounding error when building reverse packed sequence.
37 #include "TypeNLimit.h"
38 #include "BWTConstruct.h"
39 #include "MiscUtilities.h"
41 #include "TextConverter.h"
42 #include "MemManager.h"
43 #include "iniparser.h"
48 dictionary
*ParseInput(int argc
, char** argv
);
49 void ParseIniFile(char *iniFileName
);
53 void PrintShortDesc();
56 void ProcessFileName(char *outputFileName
, const char *inputFileName
, const char *databaseName
);
59 char IniFileName
[MAX_FILENAME_LEN
+1];
62 // BuildTasks parameters
63 int ParseFASTA
= TRUE
;
65 int BuildSaValue
= TRUE
;
66 int BuildSaIndex
= FALSE
;
69 unsigned long long PoolSize
= 2097152; // 2M - fixed; not configurable through ini
72 int ShowProgress
= FALSE
;
74 // Database parameters
75 char FASTAFileName
[MAX_FILENAME_LEN
+1] = "";
76 char DatabaseName
[MAX_FILENAME_LEN
+1] = "";
77 char AnnotationFileName
[MAX_FILENAME_LEN
+1] = "*.index.ann";
78 char AmbiguityFileName
[MAX_FILENAME_LEN
+1] = "*.index.amb";
79 char TranslateFileName
[MAX_FILENAME_LEN
+1] = "*.index.tra";
80 char PackedDNAFileName
[MAX_FILENAME_LEN
+1] = "*.index.pac";
81 char BWTCodeFileName
[MAX_FILENAME_LEN
+1] = "*.index.bwt";
82 char BWTOccValueFileName
[MAX_FILENAME_LEN
+1] = "*.index.fmv";
83 char SaValueFileName
[MAX_FILENAME_LEN
+1] = "*.index.sa";
84 char SaIndexFileName
[MAX_FILENAME_LEN
+1] = "*.index.sai";
86 char RevPackedDNAFileName
[MAX_FILENAME_LEN
+1] = "*.index.rev.pac";
87 char RevBWTCodeFileName
[MAX_FILENAME_LEN
+1] = "*.index.rev.bwt";
88 char RevBWTOccValueFileName
[MAX_FILENAME_LEN
+1] = "*.index.rev.fmv";
90 // Parse FASTA parameters
91 unsigned long long FASTARandomSeed
= 0;
92 int MaskLowerCase
= FALSE
;
94 // Build BWT parameters
95 unsigned int OccValueFreq
= 256;
96 float TargetNBit
= 2.5;
97 unsigned long long InitialMaxBuildSize
= 10000000;
98 unsigned long long IncMaxBuildSize
= 10000000;
100 // Build SA value parameters
101 unsigned int SaValueFreq
= 8;
103 // Build SA index parameters
104 unsigned int SaIndexNumOfChar
= 12;
106 void printBinary(unsigned long long seq
,int len
) {
109 for (i
=0;i
<len
;i
++) {
113 for (i
=j
+1;i
<64;i
++) {
114 if (text
[i
]==0) printf("0");
115 if (text
[i
]==1) printf("1");
116 if ((i
-j
-1) % 4 ==3) printf(" ");
121 void BuildReversePacked(const char *inputFileName
, unsigned long long *textLength
, const unsigned int convertToWordPacked
, const unsigned int trailerBufferInWord
) {
125 unsigned char * packedText
;
126 unsigned char * revPackedText
;
127 off64_t packedFileLen
;
128 unsigned char lastByteLength
;
132 inputFile
= (FILE*)(FILE*)fopen64(inputFileName
, "rb");
133 outputFile
= (FILE*)(FILE*)fopen64(RevPackedDNAFileName
, "wb");
135 if (inputFile
== NULL
) {
136 fprintf(stderr
, "BuildReversePacked() : Cannot open inputFileName!\n");
140 fseek(inputFile
, -1, SEEK_END
);
141 packedFileLen
= ftello64(inputFile
);
142 if (packedFileLen
== -1) {
143 fprintf(stderr
, "BuildReversePacked(): Cannot determine file length!\n");
148 fread(&lastByteLength
, sizeof(unsigned char), 1, inputFile
);
149 *textLength
= TextLengthFromBytePacked(packedFileLen
, BIT_PER_CHAR
, lastByteLength
);
152 printf("Packed file size = %llu\n",(unsigned long long) packedFileLen
);
153 printf("Text Length = %llu\n",*textLength
);
156 unsigned long long byteToProcess
= (*textLength
+CHAR_PER_BYTE
-1) / CHAR_PER_BYTE
;
157 packedText
= (unsigned char*) malloc(byteToProcess
+1);
158 revPackedText
= (unsigned char*) malloc(byteToProcess
+1);
160 fseek(inputFile
, 0, SEEK_SET
);
161 fread(packedText
, 1, packedFileLen
, inputFile
);
168 unsigned char allOneChar
= (1<<BIT_PER_CHAR
) - 1;
169 if (lastByteLength
>0) {
170 unsigned char lastByte
= packedText
[i
];
171 lastByte
>>= (CHAR_PER_BYTE
- lastByteLength
)*BIT_PER_CHAR
;
172 for (k
=0;k
<lastByteLength
;k
++) {
173 revPackedText
[j
]<<=BIT_PER_CHAR
;
174 revPackedText
[j
]|=(lastByte
& allOneChar
);
175 lastByte
>>=BIT_PER_CHAR
;
181 unsigned char lastByte
= packedText
[i
];
182 for (l
=0;l
<CHAR_PER_BYTE
;l
++) {
183 revPackedText
[j
]<<=BIT_PER_CHAR
;
184 revPackedText
[j
]|=(lastByte
& allOneChar
);
186 if (k
>=CHAR_PER_BYTE
) {
191 lastByte
>>=BIT_PER_CHAR
;
196 revPackedText
[j
]<<=(CHAR_PER_BYTE
- k
)*BIT_PER_CHAR
;
199 fwrite(revPackedText
,sizeof(unsigned char),byteToProcess
,outputFile
);
200 if (lastByteLength
==0) {
201 fwrite(&lastByteLength
,sizeof(unsigned char),1,outputFile
);
203 fwrite(&lastByteLength
,sizeof(unsigned char),1,outputFile
);
211 int main(int argc
, char** argv
) {
216 dictionary
*programInput
;
218 double elapsedTime
= 0, totalElapsedTime
= 0;
220 char filename
[MAX_FILENAME_LEN
+1];
224 unsigned long long textLength
= 0;
225 unsigned long long numSeq
;
227 BWTInc
*bwtInc
= NULL
;
228 BWTInc
*rev_bwtInc
= NULL
;
231 programInput
= ParseInput(argc
, argv
);
235 if (strcmp(argv
[0] + strlen(argv
[0]) - 4, ".exe") == 0) {
236 *(argv
[0] + strlen(argv
[0]) - 4) = '\0';
238 sprintf(filename
, "%s.ini", argv
[0]);
239 ParseIniFile(filename
);
245 if (Confirmation
== TRUE
) {
246 printf("Press Y to go or N to cancel. ");
248 while (c
!= 'y' && c
!= 'Y' && c
!= 'n' && c
!= 'N') {
251 if (c
== 'n' || c
== 'N') {
256 startTime
= setStartTime();
258 MMMasterInitialize(1, 0, FALSE
, NULL
);
259 mmPool
= MMPoolCreate(PoolSize
);
261 // Parse FASTA file to produce packed DNA and annotation file
262 if (ParseFASTA
== TRUE
) {
264 printf("Parsing FASTA file..\n");
265 numSeq
= HSPParseFASTAToPacked(FASTAFileName
, AnnotationFileName
, PackedDNAFileName
, AmbiguityFileName
, TranslateFileName
, FASTARandomSeed
, MaskLowerCase
);
267 printf("Finished. Parsed %llu sequences.\n", numSeq
);
269 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
270 printf("Elapsed time = ");
271 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
272 totalElapsedTime
+= elapsedTime
;
275 //Parse packed DNA to construct the packed reversed DNA
277 printf("Parsing FASTA file reverse..\n");
278 unsigned long long textLen
;
279 BuildReversePacked(PackedDNAFileName
,&textLen
,TRUE
,1);
280 //printf("Reversed Packed DNA generated..\n");
282 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
283 printf("Elapsed time = ");
284 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
285 totalElapsedTime
+= elapsedTime
;
290 // Construct BWTInc from text
291 if (BuildBWT
== TRUE
) {
293 printf("Building BWT..\n");
295 bwtInc
= BWTIncConstructFromPacked(mmPool
, PackedDNAFileName
, ShowProgress
,
296 TargetNBit
, InitialMaxBuildSize
, IncMaxBuildSize
);
298 printf("Finished constructing BWT in %u iterations. ", bwtInc
->numberOfIterationDone
);
300 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
301 printf("Elapsed time = ");
302 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
303 totalElapsedTime
+= elapsedTime
;
306 printf("Saving BWT..\n");
307 BWTSaveBwtCodeAndOcc(bwtInc
->bwt
, BWTCodeFileName
, BWTOccValueFileName
);
308 printf("Finished saving BWT. ");
309 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
310 printf("Elapsed time = ");
311 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
312 totalElapsedTime
+= elapsedTime
;
315 textLength
= bwtInc
->bwt
->textLength
;
318 //Building Reversed BWT
319 printf("Building Reversed BWT..\n");
321 rev_bwtInc
= BWTIncConstructFromPacked(mmPool
, RevPackedDNAFileName
, ShowProgress
,
322 TargetNBit
, InitialMaxBuildSize
, IncMaxBuildSize
);
324 printf("Finished constructing Reversed BWT in %u iterations. ", rev_bwtInc
->numberOfIterationDone
);
326 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
327 printf("Elapsed time = ");
328 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
329 totalElapsedTime
+= elapsedTime
;
332 printf("Saving BWT..\n");
333 BWTSaveBwtCodeAndOcc(rev_bwtInc
->bwt
, RevBWTCodeFileName
, RevBWTOccValueFileName
);
334 printf("Finished saving BWT. ");
335 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
336 printf("Elapsed time = ");
337 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
338 totalElapsedTime
+= elapsedTime
;
341 textLength
= rev_bwtInc
->bwt
->textLength
;
343 BWTIncFree(mmPool
, bwtInc
);
344 BWTIncFree(mmPool
, rev_bwtInc
);
348 if (BuildSaValue
|| BuildSaIndex
) {
350 printf("Loading BWT...\n");
352 bwt
= BWTLoad(mmPool
, BWTCodeFileName
, BWTOccValueFileName
, NULL
, NULL
, NULL
, NULL
);
353 //Use BWT to build the hash table
355 printf("Finished loading BWT. ");
357 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
358 printf("Elapsed time = ");
359 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
360 totalElapsedTime
+= elapsedTime
;
363 textLength
= bwt
->textLength
;
370 printf("Building SA value...\n");
373 BWTGenerateSaValue(bwt
, SaValueFreq
, bwt
->textLength
/ SaValueFreq
/ 10);
375 BWTGenerateSaValue(bwt
, SaValueFreq
, 0);
377 BWTSaveSaValue(bwt
, SaValueFileName
);
379 printf("Finished building SA value. ");
381 elapsedTime
= getElapsedTime(startTime
) - totalElapsedTime
;
382 printf("Elapsed time = ");
383 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, elapsedTime
);
384 totalElapsedTime
+= elapsedTime
;
390 /*if (BuildSaIndex) {
392 printf("Building SA index...\n");
394 BWTGenerateCachedSaIndex(bwt, SaIndexNumOfChar, SaIndexFileName);
396 printf("Finished building SA index. ");
398 elapsedTime = getElapsedTime(startTime) - totalElapsedTime;
399 printf("Elapsed time = ");
400 printElapsedTime(stdout, FALSE, FALSE, TRUE, 2, elapsedTime);
401 totalElapsedTime += elapsedTime;
407 if (BuildSaValue
|| BuildSaIndex
) {
408 BWTFree(mmPool
, bwt
);
412 // Finished all construction tasks
413 printf("Index building is completed.\n");
414 totalElapsedTime
= getElapsedTime(startTime
);
415 printf("Total elapsed time = ");
416 printElapsedTime(stdout
, FALSE
, FALSE
, TRUE
, 2, totalElapsedTime
);
419 //MMMasterPrintReport(stdout, FALSE, FALSE, FALSE);
421 //fprintf(stdout, "Number of char : %u\n", textLength);
422 //fprintf(stdout, "Bit per char : %.2f\n", (float)MMMasterMaxTotalByteDispatched() * BITS_IN_BYTE / textLength);
428 iniparser_freedict(programInput
);
434 dictionary
*ParseInput(int argc
, char** argv
) {
436 dictionary
*programInput
;
437 char t1
[3] = "-c"; // specify that this is a boolean type parameter
438 char t2
[3] = "-U"; // specify that this is a boolean type parameter
444 programInput
= paraparser_load(argc
, argv
, 2, d
); // 2 boolean type parameters
447 if (!iniparser_find_entry(programInput
, "argument:1")) {
451 iniparser_copystring(programInput
, "argument:1", DatabaseName
, DatabaseName
, MAX_FILENAME_LEN
);
452 if (strlen(DatabaseName
) + 4 > MAX_FILENAME_LEN
) {
457 // Get FASTA file name
458 iniparser_copystring(programInput
, "argument:2", FASTAFileName
, DatabaseName
, MAX_FILENAME_LEN
);
459 if (strlen(FASTAFileName
) > MAX_FILENAME_LEN
) {
465 // Whether confirmation is needed
466 Confirmation
= iniparser_find_entry(programInput
, "parameter:-c");
468 MaskLowerCase
= iniparser_find_entry(programInput
, "parameter:-U");
474 void ParseIniFile(char *iniFileName
) {
478 //printf("Loading %s ..", iniFileName);
479 ini
= iniparser_load(iniFileName
, FALSE
);
481 // printf("not found.\n");
486 // BuildTasks parameters
487 ParseFASTA
= iniparser_getboolean(ini
, "BuildTasks:ParseFASTA", ParseFASTA
);
488 BuildBWT
= iniparser_getboolean(ini
, "BuildTasks:BuildBWT", BuildBWT
);
489 BuildSaValue
= iniparser_getboolean(ini
, "BuildTasks:BuildSaValue", BuildSaValue
);
490 BuildSaIndex
= iniparser_getboolean(ini
, "BuildTasks:BuildSaIndex", BuildSaIndex
);
492 // Display parameters
493 ShowProgress
= iniparser_getboolean(ini
, "Display:ShowProgress", ShowProgress
);
495 // Parse FASTA parameters
496 FASTARandomSeed
= iniparser_getint(ini
, "ParseFASTA:RandomSeed", FASTARandomSeed
);
497 if (FASTARandomSeed
== 0) {
498 FASTARandomSeed
= getRandomSeed();
501 // Build BWT parameters
502 OccValueFreq
= iniparser_getint(ini
, "BuildBWT:OccValueFreq", OccValueFreq
);
503 TargetNBit
= (float)iniparser_getdouble(ini
, "BuildBWT:TargetNBit", TargetNBit
);
504 InitialMaxBuildSize
= iniparser_getint(ini
, "BuildBWT:InitialMaxBuildSize", InitialMaxBuildSize
);
505 IncMaxBuildSize
= iniparser_getint(ini
, "BuildBWT:IncMaxBuildSize", IncMaxBuildSize
);
507 // Build SA value parameters
508 SaValueFreq
= iniparser_getint(ini
, "BuildSAValue:SaValueFreq", SaValueFreq
);
510 // Build SA index parameters
511 SaIndexNumOfChar
= iniparser_getint(ini
, "BuildSAIndex:SaIndexNumOfChar", SaIndexNumOfChar
);
513 // Database parameters
514 iniparser_copystring(ini
, "Database:AnnotationFileName", AnnotationFileName
, AnnotationFileName
, MAX_FILENAME_LEN
);
515 iniparser_copystring(ini
, "Database:AmbiguityFileName", AmbiguityFileName
, AmbiguityFileName
, MAX_FILENAME_LEN
);
516 iniparser_copystring(ini
, "Database:TranslateFileName", TranslateFileName
, TranslateFileName
, MAX_FILENAME_LEN
);
517 iniparser_copystring(ini
, "Database:PackedDNAFileName", PackedDNAFileName
, PackedDNAFileName
, MAX_FILENAME_LEN
);
518 iniparser_copystring(ini
, "Database:BWTCodeFileName", BWTCodeFileName
, BWTCodeFileName
, MAX_FILENAME_LEN
);
519 iniparser_copystring(ini
, "Database:BWTOccValueFileName", BWTOccValueFileName
, BWTOccValueFileName
, MAX_FILENAME_LEN
);
520 iniparser_copystring(ini
, "Database:SaValueFileName", SaValueFileName
, SaValueFileName
, MAX_FILENAME_LEN
);
521 iniparser_copystring(ini
, "Database:SaIndexFileName", SaIndexFileName
, SaIndexFileName
, MAX_FILENAME_LEN
);
523 iniparser_copystring(ini
, "Database:RevPackedDNAFileName", RevPackedDNAFileName
, RevPackedDNAFileName
, MAX_FILENAME_LEN
);
524 iniparser_copystring(ini
, "Database:RevBWTCodeFileName", RevBWTCodeFileName
, RevBWTCodeFileName
, MAX_FILENAME_LEN
);
525 iniparser_copystring(ini
, "Database:RevBWTOccValueFileName", RevBWTOccValueFileName
, RevBWTOccValueFileName
, MAX_FILENAME_LEN
);
527 iniparser_freedict(ini
);
533 ProcessFileName(AnnotationFileName
, AnnotationFileName
, DatabaseName
);
534 ProcessFileName(AmbiguityFileName
, AmbiguityFileName
, DatabaseName
);
535 ProcessFileName(TranslateFileName
, TranslateFileName
, DatabaseName
);
536 ProcessFileName(PackedDNAFileName
, PackedDNAFileName
, DatabaseName
);
537 ProcessFileName(RevPackedDNAFileName
, RevPackedDNAFileName
, DatabaseName
);
538 ProcessFileName(BWTCodeFileName
, BWTCodeFileName
, DatabaseName
);
539 ProcessFileName(RevBWTCodeFileName
, RevBWTCodeFileName
, DatabaseName
);
540 ProcessFileName(BWTOccValueFileName
, BWTOccValueFileName
, DatabaseName
);
541 ProcessFileName(RevBWTOccValueFileName
, RevBWTOccValueFileName
, DatabaseName
);
542 ProcessFileName(SaValueFileName
, SaValueFileName
, DatabaseName
);
543 ProcessFileName(SaIndexFileName
, SaIndexFileName
, DatabaseName
);
549 if (!ParseFASTA
&& !BuildBWT
&& !BuildSaValue
&& !BuildSaIndex
) {
550 fprintf(stderr
, "No action is specified!\n");
554 if (PackedDNAFileName
[0] == '\0') {
555 fprintf(stderr
, "Packed DNA file name is not specified!\n");
558 if (AnnotationFileName
[0] == '\0') {
559 fprintf(stderr
, "Annotation file name is not specified!\n");
562 if (AmbiguityFileName
[0] == '\0') {
563 fprintf(stderr
, "Ambiguity file name is not specified!\n");
568 if (PackedDNAFileName
[0] == '\0') {
569 fprintf(stderr
, "Packed DNA file is not specified!\n");
572 if (BWTCodeFileName
[0] == '\0') {
573 fprintf(stderr
, "BWT code file name is not specified!\n");
576 if (BWTOccValueFileName
[0] == '\0') {
577 fprintf(stderr
, "BWT Occ value file name is not specified!\n");
580 if (TargetNBit
< 2.5) {
581 fprintf(stderr
, "Target NBit should be at least 2.5!\n");
586 if (BWTCodeFileName
[0] == '\0') {
587 fprintf(stderr
, "BWT code file is not specified!\n");
590 if (BWTOccValueFileName
[0] == '\0') {
591 fprintf(stderr
, "BWT Occ value file is not specified!\n");
594 if (SaValueFileName
[0] == '\0') {
595 fprintf(stderr
, "SA value file name is not specified!\n");
598 if (SaValueFreq
<= 0) {
599 fprintf(stderr
, "SA value frequency must > 0!\n");
605 if (BWTCodeFileName
[0] == '\0') {
606 fprintf(stderr
, "BWT code file is not specified!\n");
609 if (BWTOccValueFileName
[0] == '\0') {
610 fprintf(stderr
, "BWT Occ value file is not specified!\n");
613 if (SaIndexFileName
[0] == '\0') {
614 fprintf(stderr
, "SA index file name is not specified!\n");
617 if (SaIndexNumOfChar
<= 0) {
618 fprintf(stderr
, "SA index number of character must > 0!\n");
621 if (SaIndexNumOfChar
> 13) {
622 fprintf(stderr
, "SA index number of character must <= 13!\n");
636 /*printf("Parse FASTA file : %c\n", boolean[ParseFASTA]);
637 printf("Build BWT : %c\n", boolean[BuildBWT]);
638 printf("Build SA value : %c\n", boolean[BuildSaValue]);
639 printf("Build SA index : %c\n", boolean[BuildSaIndex]);
642 printf("Show progress : %c\n", boolean[ShowProgress]);
646 printf("Parse FASTA :\n");
647 printf("Mask lower case : %c\n", boolean[MaskLowerCase]);
648 printf("Random seed : %u\n", FASTARandomSeed);
653 printf("Build BWT :\n");
654 printf("Target N Bits : %.2f\n", TargetNBit);
655 printf("Occ value frequency : %u\n", OccValueFreq);
656 printf("Initial Max Build Size : %u Inc Max Build Size : %u\n",
657 InitialMaxBuildSize, IncMaxBuildSize);
662 printf("Build SA value :\n");
663 printf("SA value frequency : %u\n", SaValueFreq);
668 printf("Build SA index :\n");
669 printf("SA index no. of char : %u\n", SaIndexNumOfChar);
673 printf("Annotation file : %s\n", AnnotationFileName);
674 printf("Ambigurity file : %s\n", AmbiguityFileName);
675 printf("Packed DNA file : %s\n", PackedDNAFileName);
676 printf("BWT Code file : %s\n", BWTCodeFileName);
677 printf("BWT Occ value file : %s\n", BWTOccValueFileName);
678 printf("SA value file : %s\n", SaValueFileName);
679 printf("SA index file : %s\n", SaIndexFileName);
681 printf("Reversed Packed DNA file : %s\n", RevPackedDNAFileName);
682 printf("Reversed BWT Code file : %s\n", RevBWTCodeFileName);
683 printf("Reversed BWT Occ value file : %s\n", RevBWTOccValueFileName);
688 void PrintShortDesc() {
690 /*printf("BWTFormatdb v1.0, Copyright (C) 2006, Wong Chi Kwong.\n");
691 printf("BWTFormatdb comes with ABSOLUTELY NO WARRENTY.\n");
692 printf("BWTFormatdb is free software, and you are welcome to\n");
693 printf("redistribute it under certain conditions.\n");
694 printf("For details type BWTFormatdb.\n");
701 /*printf("BWTFormatdb v1.0, Copyright (C) 2006, Wong Chi Kwong.\n");
704 printf("This program is free software; you can redistribute it and/or\n");
705 printf("modify it under the terms of the GNU General Public License\n");
706 printf("as published by the Free Software Foundation; either version 2\n");
707 printf("of the License, or (at your option) any later version.\n");
710 printf("This program is distributed in the hope that it will be useful,\n");
711 printf("but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
712 printf("MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n");
713 printf("GNU General Public License for more details.\n");
716 printf("You should have received a copy of the GNU General Public License\n");
717 printf("along with this program; if not, write to the Free Software\n");
718 printf("Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.\n");
721 printf("Syntax: 2bwt-builder <sequence file>\n");
725 void ProcessFileName(char *outputFileName
, const char *inputFileName
, const char *databaseName
) {
727 char tempChar
[MAX_FILENAME_LEN
];
728 unsigned long long i
;
730 if (inputFileName
== NULL
) {
731 if (outputFileName
!= inputFileName
) {
732 outputFileName
[0] = '\0';
737 if (strlen(databaseName
) + strlen(inputFileName
) > MAX_FILENAME_LEN
) {
738 fprintf(stderr
, "File length is too long!\n");
742 strncpy(tempChar
, inputFileName
, MAX_FILENAME_LEN
);
745 for (i
=0; i
<MAX_FILENAME_LEN
; i
++) {
746 if (tempChar
[i
] == '*') {
750 if (i
<MAX_FILENAME_LEN
) {
752 sprintf(outputFileName
, "%s%s%s", tempChar
, databaseName
, tempChar
+ i
+ 1);
754 sprintf(outputFileName
, "%s", tempChar
);