modified: src1/input.c
[GalaxyCodeBases.git] / BGI / BASE / src / 2bwt / BWTConstruct.h
blob11b3c2d949bcbbd0b408850d40244ab099ea899a
1 /*
3 BWTConstruct.h BWT-Index Construction
5 This module constructs BWT and auxiliary data structures.
7 Copyright (C) 2004, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #ifndef __BWTCONSTRUCT_H__
26 #define __BWTCONSTRUCT_H__
28 #include "TypeNLimit.h"
29 #include "BWT.h"
31 #define BWTINC_INSERT_SORT_NUM_ITEM 7
32 #define BWTINC_MIN_MEMORY_IN_WORD 1048576 // 4M RAM
34 typedef struct BWTInc {
35 BWT *bwt;
36 unsigned int numberOfIterationDone;
37 unsigned long long *cumulativeCountInCurrentBuild;
38 unsigned long long availableWord;
39 unsigned long long targetTextLength;
40 float targetNBit;
41 unsigned long long buildSize;
42 unsigned long long initialMaxBuildSize;
43 unsigned long long incMaxBuildSize;
44 unsigned int firstCharInLastIteration;
45 unsigned int *workingMemory;
46 unsigned int *packedText;
47 unsigned char *textBuffer;
48 unsigned int *packedShift;
49 } BWTInc;
51 // This is how the workingMemory is used (initial build)
53 // 1. When ABS Rank is generated and sorting is being done
54 // |-----------Seq-----------|--------ABS Rank--------|---Text----|
55 // 4nbyte 4nbyte 2nbit
56 // 2. When seq are regenerated and BWT is generated
57 // |-----------Seq-----------|--------ABS Rank--------|----BWT----|
58 // 4nbyte 4nbyte 2nbit
61 // This is how the workingMemory is used (incremental build)
63 // 1. When ABS Rank is generated
64 // |--ABS Rank--|----Seq-----|----Text----|----Occ----|----BWT----|
65 // 4nbyte 4nbyte 4nbyte(2nbit)
66 // 2. When sorting is being done
67 // |sorted ARank|----Seq-----|--REL-Rank--|----Occ----|----BWT----|
68 // 4nbyte 4nbyte 4nbyte
69 // 3. When new BWT is built
70 // |sorted ARank|---new BWT--|--REL-Rank--|----Occ----|----BWT----|
71 // 4nbyte 4nbyte 4nbyte
72 // 4. When insertion is being done
73 // |sorted ARank|---new BWT--|---new Occ---|------merged BWT------|
74 // 4nbyte 4nbyte (4nbyte freed for new Occ and merged BWT)
76 // BWT Construction
77 BWTInc *BWTIncCreate(MMPool *mmPool, const unsigned long long textLength, const float targetNBit,
78 const unsigned int initialMaxBuildSize, const unsigned int incMaxBuildSize);
79 void BWTIncFree(MMPool *mmPool, BWTInc *fmiInc);
80 BWTInc *BWTIncConstructFromPacked(MMPool *mmPool, const char *inputFileName, const unsigned int showProgress,
81 const float targetNBit, const unsigned long long initialMaxBuildSize, const unsigned long long incMaxBuildSize);
82 unsigned long long BWTGenerateOccValueToFileFromBwt(const char *bwtFileName, const char *occValueFileName, unsigned long long* decodeTable);
83 void BWTGenerateOccValueFromBwt(const unsigned int* bwt, unsigned int* __restrict occValue, unsigned long long* __restrict occValueMajor,
84 const unsigned long long textLength, const unsigned long long* decodeTable);
86 // Auxliary data structures
87 void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *occValueFileName);
88 void BWTGenerateSaValue(BWT *bwt, const unsigned long long saValueFreq, unsigned int showProgressInterval);
89 void BWTGenerateFullSaValue(BWT *bwt);
90 void BWTSaveSaValue(const BWT *bwt, const char *saValueFileName);
91 void BWTGenerateInverseSa(BWT *bwt, const unsigned long long inverseSaFreq, unsigned int showProgressInterval);
92 void BWTSaveInverseSa(const BWT *bwt, const char *inverseSaFileName);
93 void BWTGenerateCachedSaIndex(const BWT *bwt, const unsigned int numOfChar, const char *cachedSaIndexFileName);
94 void BWTGenerateSaBitmap(const BWT *bwt, const unsigned int numOfChar, const char *saBitmapFileName);
95 void BWTGenerateCompressedSaBitmap(const BWT *bwt, const unsigned int numOfChar, const char *saBitmapFileName);
96 void BWTCountPattern(const BWT *bwt, const unsigned int numOfChar);
98 // verification functions
99 void BWTPrintAndVerifyOccValue(const BWT *bwt, FILE *output);
100 void BWTPrintAndVerifySaValue(const BWT *bwt, FILE *output);
101 void BWTPrintAndVerifyInverseSa(const BWT *bwt, FILE *output);
107 #endif