3 BWTConstruct.h BWT-Index Construction
5 This module constructs BWT and auxiliary data structures.
7 Copyright (C) 2004, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #ifndef __BWTCONSTRUCT_H__
26 #define __BWTCONSTRUCT_H__
28 #include "TypeNLimit.h"
31 #define BWTINC_INSERT_SORT_NUM_ITEM 7
32 #define BWTINC_MIN_MEMORY_IN_WORD 1048576 // 4M RAM
34 typedef struct BWTInc
{
36 unsigned int numberOfIterationDone
;
37 unsigned long long *cumulativeCountInCurrentBuild
;
38 unsigned long long availableWord
;
39 unsigned long long targetTextLength
;
41 unsigned long long buildSize
;
42 unsigned long long initialMaxBuildSize
;
43 unsigned long long incMaxBuildSize
;
44 unsigned int firstCharInLastIteration
;
45 unsigned int *workingMemory
;
46 unsigned int *packedText
;
47 unsigned char *textBuffer
;
48 unsigned int *packedShift
;
51 // This is how the workingMemory is used (initial build)
53 // 1. When ABS Rank is generated and sorting is being done
54 // |-----------Seq-----------|--------ABS Rank--------|---Text----|
55 // 4nbyte 4nbyte 2nbit
56 // 2. When seq are regenerated and BWT is generated
57 // |-----------Seq-----------|--------ABS Rank--------|----BWT----|
58 // 4nbyte 4nbyte 2nbit
61 // This is how the workingMemory is used (incremental build)
63 // 1. When ABS Rank is generated
64 // |--ABS Rank--|----Seq-----|----Text----|----Occ----|----BWT----|
65 // 4nbyte 4nbyte 4nbyte(2nbit)
66 // 2. When sorting is being done
67 // |sorted ARank|----Seq-----|--REL-Rank--|----Occ----|----BWT----|
68 // 4nbyte 4nbyte 4nbyte
69 // 3. When new BWT is built
70 // |sorted ARank|---new BWT--|--REL-Rank--|----Occ----|----BWT----|
71 // 4nbyte 4nbyte 4nbyte
72 // 4. When insertion is being done
73 // |sorted ARank|---new BWT--|---new Occ---|------merged BWT------|
74 // 4nbyte 4nbyte (4nbyte freed for new Occ and merged BWT)
77 BWTInc
*BWTIncCreate(MMPool
*mmPool
, const unsigned long long textLength
, const float targetNBit
,
78 const unsigned int initialMaxBuildSize
, const unsigned int incMaxBuildSize
);
79 void BWTIncFree(MMPool
*mmPool
, BWTInc
*fmiInc
);
80 BWTInc
*BWTIncConstructFromPacked(MMPool
*mmPool
, const char *inputFileName
, const unsigned int showProgress
,
81 const float targetNBit
, const unsigned long long initialMaxBuildSize
, const unsigned long long incMaxBuildSize
);
82 unsigned long long BWTGenerateOccValueToFileFromBwt(const char *bwtFileName
, const char *occValueFileName
, unsigned long long* decodeTable
);
83 void BWTGenerateOccValueFromBwt(const unsigned int* bwt
, unsigned int* __restrict occValue
, unsigned long long* __restrict occValueMajor
,
84 const unsigned long long textLength
, const unsigned long long* decodeTable
);
86 // Auxliary data structures
87 void BWTSaveBwtCodeAndOcc(const BWT
*bwt
, const char *bwtFileName
, const char *occValueFileName
);
88 void BWTGenerateSaValue(BWT
*bwt
, const unsigned long long saValueFreq
, unsigned int showProgressInterval
);
89 void BWTGenerateFullSaValue(BWT
*bwt
);
90 void BWTSaveSaValue(const BWT
*bwt
, const char *saValueFileName
);
91 void BWTGenerateInverseSa(BWT
*bwt
, const unsigned long long inverseSaFreq
, unsigned int showProgressInterval
);
92 void BWTSaveInverseSa(const BWT
*bwt
, const char *inverseSaFileName
);
93 void BWTGenerateCachedSaIndex(const BWT
*bwt
, const unsigned int numOfChar
, const char *cachedSaIndexFileName
);
94 void BWTGenerateSaBitmap(const BWT
*bwt
, const unsigned int numOfChar
, const char *saBitmapFileName
);
95 void BWTGenerateCompressedSaBitmap(const BWT
*bwt
, const unsigned int numOfChar
, const char *saBitmapFileName
);
96 void BWTCountPattern(const BWT
*bwt
, const unsigned int numOfChar
);
98 // verification functions
99 void BWTPrintAndVerifyOccValue(const BWT
*bwt
, FILE *output
);
100 void BWTPrintAndVerifySaValue(const BWT
*bwt
, FILE *output
);
101 void BWTPrintAndVerifyInverseSa(const BWT
*bwt
, FILE *output
);