modified: src1/worker.c
[GalaxyCodeBases.git] / c_cpp / readscorr / sdleft.h
blobec4909f836f6eb1166168c52afec8b5659b53c99
1 // by Hu Xuesong
2 #ifndef _G_SDLEFT_H
3 #define _G_SDLEFT_H
5 #include "gtypendef.h"
6 #include <stddef.h> //size_t
7 #include <stdint.h> //uint64_t
8 #include <stdio.h> //FILE
9 #include "gFileIO.h"
10 #include "cfgparser.h"
12 #define HASH_LENB (128u)
13 #define SDL_SUBARRAY_UNIT (8u)
14 //#define SDLA_ITEMARRAY 32u
15 #define SUBARRAY_SIZE (32u*1024u)
16 //gcc -### -march=native -E /usr/include/stdlib.h 2>&1 | grep l1-cache-size
17 #define ENCODEDBIT_ENTROPY_PAD (1u);
18 /*
19 http://math.stackexchange.com/questions/53353/is-there-some-reversible-mapping-that-as-uniform-as-a-hash
20 Let $H$ be a hash function taking an arbitrary string as input and producing an $n$-bit string as output.
21 Given an input string $s$, let $s_0$ denote the first $n$ bits of $s$ and $s_1$ the rest
22 (i.e. $s = s_0\;||\;s_1$, where $s_0$ is $n$ bits long and $||$ denotes concatenation).
23 Then define $$f(s) = (s_0 \oplus H(s_1))\;||\;s_1,$$ where $\oplus$ means bitwise XOR.
24 It's easy to see that $f$ is its own inverse, i.e.$f(f(s)) = s$,
25 so that the input string $s$ can be recovered from $f(s)$ just by running it through $f$ again.
27 f(s)=(s0 XOR H(s1))||s1, length(s0)=EncodedBit, length(s1)=rBit
29 EncodedBit < ArrayBit < rBit
30 */
31 typedef struct __SDLeftArray_t {
32 unsigned char CountBit, rBit, ArrayBit, EncodedBit;
33 unsigned char itemByte; //, HashCnt;
34 uint16_t SubItemCount,SubItemByUnit; //max(SubItemCount) should be 32768
35 size_t ArraySize,SDLAbyte;
36 //uint64_t maxCount; == Item_CountBitMask
37 //unsigned char ArrayCount;
38 uint64_t ItemInsideAll, CellOverflowCount, CountBitOverflow; // ItemInsideAll = ItemInsideArray + CellOverflowCount
39 double FalsePositiveRatio;
40 void *pDLA, *pextree;
41 uint64_t maxCountSeen;
42 //uint64_t *outhash;
43 uint64_t outhash[2]; // both ArrayBit and rBit is (0,64], so HashCnt==1 for MurmurHash3_x64_128
44 uint128_t Item_rBitMask;
45 uint64_t Hash_ArrayBitMask, Hash_rBitMask, Item_CountBitMask;
46 } SDLeftArray_t;
48 typedef struct __SDLdumpHead_t {
49 char FileID[4]; //"GDSD"
50 unsigned char FileVersion[2]; //0,1
51 uint16_t kmersize;
52 unsigned char CountBit, rBit;
53 uint16_t SubItemCount;
54 uint64_t ArraySize, SDLAbyte, extreebyte;
55 uint64_t ItemInsideAll, CellOverflowCount, CountBitOverflow;
56 uint64_t maxCountSeen;
57 uint64_t HistMaxCntVal;
58 uint64_t HistMaxHistVal;
59 double HistMean;
60 double HistSStd;
61 uint32_t crc32c;
62 } __attribute__ ((packed)) SDLdumpHead;
64 SDLeftArray_t *dleft_arraynew(unsigned char CountBit, const SDLConfig * const psdlcfg);
65 SDLeftArray_t *dleft_arrayinit(unsigned char CountBit, size_t ArraySize, uint16_t SubItemCount);
66 size_t dleft_insert_read(unsigned int k, char const *const inseq, size_t len, SDLeftArray_t *dleftobj);
68 unsigned char GETitemByte_PADrBit_trimSubItemCount(const unsigned char CountBit, unsigned char *prBit, uint16_t *pSubItemCount);
70 void fprintSDLAnfo(FILE *stream, const SDLeftArray_t * dleftobj);
71 void dleft_arraydestroy(SDLeftArray_t * const dleftobj);
72 void dleft_dump(const SDLeftArray_t * const, SDLdumpHead * const, FILE *);
74 //#include "sdleftTF.h"
75 typedef struct __SDLeftStat_t {
76 uint64_t HistMaxCntVal;
77 uint64_t HistMaxHistVal;
78 double HistMean;
79 double HistSStd;
80 } SDLeftStat_t;
82 typedef SDLeftStat_t *(G_SDLeftArray_IN)(SDLeftArray_t * const, FILE *); //*G_SDLeftArray_IN() is OK,too .
84 #ifdef TEST
85 SDLeftStat_t * dleft_stat(SDLeftArray_t * const dleftobj, FILE *stream, FILE *fpdat);
86 #else
87 SDLeftStat_t * dleft_stat(SDLeftArray_t * const dleftobj, FILE *stream);
88 #endif
90 #endif /* sdleft.h */