6 #include <stddef.h> //size_t
7 #include <stdint.h> //uint64_t
8 #include <stdio.h> //FILE
10 #include "cfgparser.h"
12 #define HASH_LENB (128u)
13 #define SDL_SUBARRAY_UNIT (8u)
14 //#define SDLA_ITEMARRAY 32u
15 #define SUBARRAY_SIZE (32u*1024u)
16 //gcc -### -march=native -E /usr/include/stdlib.h 2>&1 | grep l1-cache-size
17 #define ENCODEDBIT_ENTROPY_PAD (1u);
19 http://math.stackexchange.com/questions/53353/is-there-some-reversible-mapping-that-as-uniform-as-a-hash
20 Let $H$ be a hash function taking an arbitrary string as input and producing an $n$-bit string as output.
21 Given an input string $s$, let $s_0$ denote the first $n$ bits of $s$ and $s_1$ the rest
22 (i.e. $s = s_0\;||\;s_1$, where $s_0$ is $n$ bits long and $||$ denotes concatenation).
23 Then define $$f(s) = (s_0 \oplus H(s_1))\;||\;s_1,$$ where $\oplus$ means bitwise XOR.
24 It's easy to see that $f$ is its own inverse, i.e.$f(f(s)) = s$,
25 so that the input string $s$ can be recovered from $f(s)$ just by running it through $f$ again.
27 f(s)=(s0 XOR H(s1))||s1, length(s0)=EncodedBit, length(s1)=rBit
29 EncodedBit < ArrayBit < rBit
31 typedef struct __SDLeftArray_t
{
32 unsigned char CountBit
, rBit
, ArrayBit
, EncodedBit
;
33 unsigned char itemByte
; //, HashCnt;
34 uint16_t SubItemCount
,SubItemByUnit
; //max(SubItemCount) should be 32768
35 size_t ArraySize
,SDLAbyte
;
36 //uint64_t maxCount; == Item_CountBitMask
37 //unsigned char ArrayCount;
38 uint64_t ItemInsideAll
, CellOverflowCount
, CountBitOverflow
; // ItemInsideAll = ItemInsideArray + CellOverflowCount
39 double FalsePositiveRatio
;
41 uint64_t maxCountSeen
;
43 uint64_t outhash
[2]; // both ArrayBit and rBit is (0,64], so HashCnt==1 for MurmurHash3_x64_128
44 uint128_t Item_rBitMask
;
45 uint64_t Hash_ArrayBitMask
, Hash_rBitMask
, Item_CountBitMask
;
48 typedef struct __SDLdumpHead_t
{
49 char FileID
[4]; //"GDSD"
50 unsigned char FileVersion
[2]; //0,1
52 unsigned char CountBit
, rBit
;
53 uint16_t SubItemCount
;
54 uint64_t ArraySize
, SDLAbyte
, extreebyte
;
55 uint64_t ItemInsideAll
, CellOverflowCount
, CountBitOverflow
;
56 uint64_t maxCountSeen
;
57 uint64_t HistMaxCntVal
;
58 uint64_t HistMaxHistVal
;
62 } __attribute__ ((packed
)) SDLdumpHead
;
64 SDLeftArray_t
*dleft_arraynew(unsigned char CountBit
, const SDLConfig
* const psdlcfg
);
65 SDLeftArray_t
*dleft_arrayinit(unsigned char CountBit
, size_t ArraySize
, uint16_t SubItemCount
);
66 size_t dleft_insert_read(unsigned int k
, char const *const inseq
, size_t len
, SDLeftArray_t
*dleftobj
);
68 unsigned char GETitemByte_PADrBit_trimSubItemCount(const unsigned char CountBit
, unsigned char *prBit
, uint16_t *pSubItemCount
);
70 void fprintSDLAnfo(FILE *stream
, const SDLeftArray_t
* dleftobj
);
71 void dleft_arraydestroy(SDLeftArray_t
* const dleftobj
);
72 void dleft_dump(const SDLeftArray_t
* const, SDLdumpHead
* const, FILE *);
74 //#include "sdleftTF.h"
75 typedef struct __SDLeftStat_t
{
76 uint64_t HistMaxCntVal
;
77 uint64_t HistMaxHistVal
;
82 typedef SDLeftStat_t
*(G_SDLeftArray_IN
)(SDLeftArray_t
* const, FILE *); //*G_SDLeftArray_IN() is OK,too .
85 SDLeftStat_t
* dleft_stat(SDLeftArray_t
* const dleftobj
, FILE *stream
, FILE *fpdat
);
87 SDLeftStat_t
* dleft_stat(SDLeftArray_t
* const dleftobj
, FILE *stream
);