3 TextConverter.h Text Converter
5 This module contains miscellaneous text conversion functions.
7 Copyright (C) 2004, Wong Chi Kwong.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 #ifndef __TEXTCONVERTOR_H__
26 #define __TEXTCONVERTOR_H__
28 #include "TypeNLimit.h"
29 #include "MemManager.h"
31 #define INVALID_CHAR 0xFF
32 #define CHAR_MAP_SIZE 256
33 #define PACKED_BUFFER_SIZE (PACKED_BUFFER_SIZE_IN_WORD * BYTES_IN_WORD)
34 #define PACKED_BUFFER_SIZE_IN_WORD 65536
35 #define MAX_SEQ_NAME_LENGTH 256
36 #define RANDOM_SUBSTITUTE 'R'
38 // charMap is a char array of size 256. The index of the array is the input text value
39 // and the content of the array is the output text value. e.g. A -> 0, C -> 1
40 // If the value of an entry = INVALID_CHAR, the indexed text value is an invalid input
42 // Retrieve word packed text
43 unsigned long long GetWordPackedText(const unsigned int *packedText
, const unsigned long long index
, const unsigned long long shift
, const unsigned long long numberOfBit
, const unsigned long long vacantBit
);
45 // Character map functions
46 unsigned long long ReadCharMap(unsigned char *charMap
, const char *inputFileName
, const unsigned char defaultMapping
);
47 void GenerateReverseCharMap(const unsigned char *charMap
, unsigned char *reverseCharMap
);
49 // Word packed text functions
50 unsigned int BitPerWordPackedChar(const unsigned int alphabetSize
);
51 unsigned long long TextLengthFromWordPacked(unsigned long long wordPackedLength
, unsigned int bitPerChar
, unsigned int lastWordLength
);
52 unsigned long long WordPackedLengthFromText(unsigned long long textLength
, unsigned int bitPerChar
);
53 unsigned int LastWordLength(unsigned long long textLength
, unsigned int bitPerChar
);
55 // Byte packed text functions
56 unsigned int BitPerBytePackedChar(const unsigned int alphabetSize
);
57 unsigned long long TextLengthFromBytePacked(unsigned long long bytePackedLength
, unsigned int bitPerChar
, unsigned int lastByteLength
);
58 unsigned long long BytePackedLengthFromText(unsigned long long textLength
, unsigned int bitPerChar
);
59 unsigned char LastByteLength(unsigned long long textLength
, unsigned int bitPerChar
);
61 // Conversion functions
62 void ConvertTextToWordPacked(const unsigned char *input
, unsigned int *output
, const unsigned char *charMap
, const unsigned int alphabetSize
, const unsigned long long textLength
);
63 void ConvertTextToBytePacked(const unsigned char *input
, unsigned char *output
, const unsigned char *charMap
, const unsigned int alphabetSize
, const unsigned long long textLength
);
64 void ConvertWordPackedToText(const unsigned int *input
, unsigned char *output
, const unsigned char *reverseCharMap
, const unsigned int alphabetSize
, const unsigned long long textLength
);
65 void ConvertBytePackedToText(const unsigned char *input
, unsigned char *output
, const unsigned char *reverseCharMap
, const unsigned int alphabetSize
, const unsigned long long textLength
);
66 void ConvertBytePackedToCode(const unsigned char *input
, unsigned char *output
, const unsigned int alphabetSize
, const unsigned long long textLength
);
67 void ConvertWordPackedToBytePacked(const unsigned int *input
, unsigned char *output
, const unsigned int alphabetSize
, const unsigned long long textLength
);
68 void ConvertBytePackedToWordPacked(const unsigned char *input
, unsigned int *output
, const unsigned int alphabetSize
, const unsigned long long textLength
);
69 void ConvertTextToCode(const unsigned char *input
, unsigned char *output
, const unsigned char *charMap
, const unsigned long long textLength
);
70 void ConvertCodeToText(const unsigned char *input
, unsigned char *output
, const unsigned char *reverseCharMap
, const unsigned long long textLength
);
72 // Pack text with all shift
73 void PackTextWithAllShift(const unsigned char *input
, unsigned int **output
, const unsigned char *charMap
, const unsigned int alphabetSize
, const unsigned long long textLength
);
76 unsigned long long ReadTextAsWordPacked(const char *inputFileName
, const unsigned char *charMap
, const unsigned int alphabetSize
, unsigned int *targetAddress
, const unsigned long long maxTextLength
);
77 unsigned long long ReadBytePackedAsWordPacked(const char *inputFileName
, const unsigned int alphabetSize
, unsigned int *targetAddress
, const unsigned long long maxTextLength
);
78 void *DNALoadPacked(const char *inputFileName
, unsigned long long *textLength
, const unsigned int convertToWordPacked
, const unsigned int trailerBufferInWord
);
79 void DNAFreePacked(void* packedDna
, const unsigned long long textLength
, const unsigned int trailerBufferInWord
);
82 void SaveText(const char *outputFileName
, const unsigned char *text
, const unsigned long long textLength
);
83 void SaveBytePacked(const char *outputFileName
, const unsigned char *wordPacked
, const unsigned long long textLength
, const unsigned int alphabetSize
);
84 void SaveWordPacked(const char *outputFileName
, const unsigned int *wordPacked
, const unsigned long long textLength
, const unsigned int alphabetSize
);
86 // Incremental load functions (start from end of text)
87 FILE *InitialLoadPackedIncFromEnd(const char* inputFileName
, unsigned char *packedOutput
, const unsigned int alphabetSize
, const unsigned long long packedLengthPerLoad
, unsigned long long *textLength
, unsigned long long *textLengthForThisLoad
);
88 void LoadPackedIncFromEnd(FILE *packedFile
, unsigned char *packedOutput
, const unsigned long long packedLengthPerLoad
);
89 FILE *InitialLoadTextIncFromEnd(const char* inputFileName
, unsigned char *textOutput
, const unsigned long long textLengthPerLoad
, unsigned long long *textLength
, unsigned long long *textLengthForThisLoad
);
90 void LoadTextIncFromEnd(FILE *textFile
, unsigned char *textOutput
, const unsigned long long textLengthPerLoad
);