modified: src1/input.c
[GalaxyCodeBases.git] / BGI / BASE / src / 2bwt / TextConverter.h
blobcb7af75764e661c9aaac58b29366e56c02e57bd6
1 /*
3 TextConverter.h Text Converter
5 This module contains miscellaneous text conversion functions.
7 Copyright (C) 2004, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #ifndef __TEXTCONVERTOR_H__
26 #define __TEXTCONVERTOR_H__
28 #include "TypeNLimit.h"
29 #include "MemManager.h"
31 #define INVALID_CHAR 0xFF
32 #define CHAR_MAP_SIZE 256
33 #define PACKED_BUFFER_SIZE (PACKED_BUFFER_SIZE_IN_WORD * BYTES_IN_WORD)
34 #define PACKED_BUFFER_SIZE_IN_WORD 65536
35 #define MAX_SEQ_NAME_LENGTH 256
36 #define RANDOM_SUBSTITUTE 'R'
38 // charMap is a char array of size 256. The index of the array is the input text value
39 // and the content of the array is the output text value. e.g. A -> 0, C -> 1
40 // If the value of an entry = INVALID_CHAR, the indexed text value is an invalid input
42 // Retrieve word packed text
43 unsigned long long GetWordPackedText(const unsigned int *packedText, const unsigned long long index, const unsigned long long shift, const unsigned long long numberOfBit, const unsigned long long vacantBit);
45 // Character map functions
46 unsigned long long ReadCharMap(unsigned char *charMap, const char *inputFileName, const unsigned char defaultMapping);
47 void GenerateReverseCharMap(const unsigned char *charMap, unsigned char *reverseCharMap);
49 // Word packed text functions
50 unsigned int BitPerWordPackedChar(const unsigned int alphabetSize);
51 unsigned long long TextLengthFromWordPacked(unsigned long long wordPackedLength, unsigned int bitPerChar, unsigned int lastWordLength);
52 unsigned long long WordPackedLengthFromText(unsigned long long textLength, unsigned int bitPerChar);
53 unsigned int LastWordLength(unsigned long long textLength, unsigned int bitPerChar);
55 // Byte packed text functions
56 unsigned int BitPerBytePackedChar(const unsigned int alphabetSize);
57 unsigned long long TextLengthFromBytePacked(unsigned long long bytePackedLength, unsigned int bitPerChar, unsigned int lastByteLength);
58 unsigned long long BytePackedLengthFromText(unsigned long long textLength, unsigned int bitPerChar);
59 unsigned char LastByteLength(unsigned long long textLength, unsigned int bitPerChar);
61 // Conversion functions
62 void ConvertTextToWordPacked(const unsigned char *input, unsigned int *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned long long textLength);
63 void ConvertTextToBytePacked(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned long long textLength);
64 void ConvertWordPackedToText(const unsigned int *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned long long textLength);
65 void ConvertBytePackedToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned long long textLength);
66 void ConvertBytePackedToCode(const unsigned char *input, unsigned char *output, const unsigned int alphabetSize, const unsigned long long textLength);
67 void ConvertWordPackedToBytePacked(const unsigned int *input, unsigned char *output, const unsigned int alphabetSize, const unsigned long long textLength);
68 void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize, const unsigned long long textLength);
69 void ConvertTextToCode(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned long long textLength);
70 void ConvertCodeToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned long long textLength);
72 // Pack text with all shift
73 void PackTextWithAllShift(const unsigned char *input, unsigned int **output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned long long textLength);
75 // Full load function
76 unsigned long long ReadTextAsWordPacked(const char *inputFileName, const unsigned char *charMap, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned long long maxTextLength);
77 unsigned long long ReadBytePackedAsWordPacked(const char *inputFileName, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned long long maxTextLength);
78 void *DNALoadPacked(const char *inputFileName, unsigned long long *textLength, const unsigned int convertToWordPacked, const unsigned int trailerBufferInWord);
79 void DNAFreePacked(void* packedDna, const unsigned long long textLength, const unsigned int trailerBufferInWord);
81 // Save functions
82 void SaveText(const char *outputFileName, const unsigned char *text, const unsigned long long textLength);
83 void SaveBytePacked(const char *outputFileName, const unsigned char *wordPacked, const unsigned long long textLength, const unsigned int alphabetSize);
84 void SaveWordPacked(const char *outputFileName, const unsigned int *wordPacked, const unsigned long long textLength, const unsigned int alphabetSize);
86 // Incremental load functions (start from end of text)
87 FILE *InitialLoadPackedIncFromEnd(const char* inputFileName, unsigned char *packedOutput, const unsigned int alphabetSize, const unsigned long long packedLengthPerLoad, unsigned long long *textLength, unsigned long long *textLengthForThisLoad);
88 void LoadPackedIncFromEnd(FILE *packedFile, unsigned char *packedOutput, const unsigned long long packedLengthPerLoad);
89 FILE *InitialLoadTextIncFromEnd(const char* inputFileName, unsigned char *textOutput, const unsigned long long textLengthPerLoad, unsigned long long *textLength, unsigned long long *textLengthForThisLoad);
90 void LoadTextIncFromEnd(FILE *textFile, unsigned char *textOutput, const unsigned long long textLengthPerLoad);
93 #endif