1 #include <stdlib.h> //calloc
2 #include <stdint.h> // uint_fast8_t
3 #include <math.h> //log2, ceil
4 #include <stdio.h> //fprintf, fseek
6 #include <string.h> //strcmp, strncpy
8 #include <endian.h> //BYTE_ORDER, LITTLE_ENDIAN 1234
15 #include "MurmurHash3.h"
19 #define HASHSEED (0x3ab2ae35)
21 unsigned char GETitemByte_PADrBit_trimSubItemCount(unsigned char CountBit
, unsigned char *prBit
, uint16_t *pSubItemCount
){
22 unsigned char itemByte
= (CountBit
+*prBit
+7u) >> 3; // 2^3=8
23 *prBit
= (itemByte
<<3) - CountBit
;
24 #ifndef TEST /* SubItemCount is trimed on rBit only */
25 double maxSubItemByR
= floor(pow(2.0,(double)*prBit
));
26 if ( *pSubItemCount
> maxSubItemByR
) *pSubItemCount
= (uint16_t)maxSubItemByR
; // safe since *pSubItemCount was bigger
32 SDLeftArray_t
*dleft_arraynew(const unsigned char CountBit
, const SDLConfig
* const psdlcfg
, int kmer
){
35 uint16_t SubItemCount
;
37 return dleft_arrayinit(CountBit
, ArraySize
, SubItemCount
, kmer
);
41 SDLeftArray_t
*dleft_arrayinit(unsigned char CountBit
, size_t ArraySize
, uint16_t SubItemCount
, int kmer
) {
42 if (ArraySize
<2u || ArraySize
>(1LLU<<63) || CountBit
<3u || CountBit
>8u*sizeof(uint64_t) || SubItemCount
<1u ) {
43 err(EXIT_FAILURE
, "[x]Wrong D Left Array Parameters:(%d)[%u]x%zd ",CountBit
,SubItemCount
,ArraySize
);
44 } // CountBit+rBit >= 9, makes uint16_t always OK
45 unsigned char ArrayBit
= ceil(log2(ArraySize
));
46 unsigned char rBit
= ArrayBit
+ ENCODEDBIT_ENTROPY_PAD
;
47 int toEncode
= kmer
*2 - rBit
;
48 if (toEncode
> ArrayBit
) {
50 rBit
= kmer
*2 - ArrayBit
;
52 unsigned char EncodedBit
=(toEncode
>0)?toEncode
:0;
54 #ifdef TEST /* Test mode, keep rBit, pad CountBit */
55 unsigned char itemByte
= GETitemByte_PADrBit_trimSubItemCount(rBit
,&CountBit
,&SubItemCount
);
56 #else /* Normal, keep CountBit, pad rBit */
57 unsigned char itemByte
= GETitemByte_PADrBit_trimSubItemCount(CountBit
,&rBit
,&SubItemCount
);
59 SDLeftArray_t
*dleftobj
= calloc(1,sizeof(SDLeftArray_t
)); // set other int to 0
60 dleftobj
->SDLAbyte
= (SubItemCount
*itemByte
*ArraySize
+127u)&(~(size_t)127u); // We are reading in uint128_t now.
61 dleftobj
->pDLA
= calloc(1,dleftobj
->SDLAbyte
);
62 int mlock_r
= mlock(dleftobj
->pDLA
,dleftobj
->SDLAbyte
);
63 if (mlock_r
) warn("[!]Cannot lock SDL array in memory. Performance maybe reduced.");
64 dleftobj
->CountBit
= CountBit
;
65 dleftobj
->rBit
= rBit
;
66 dleftobj
->ArrayBit
= ArrayBit
;
67 dleftobj
->EncodedBit
= EncodedBit
;
68 dleftobj
->itemByte
= itemByte
;
69 dleftobj
->ArraySize
= ArraySize
;
70 dleftobj
->SubItemCount
= SubItemCount
;
71 dleftobj
->SubItemByUnit
= SubItemCount
/SDL_SUBARRAY_UNIT
;
72 dleftobj
->maxCountSeen
= 1; //if SDLA is not empty, maxCountSeen>=1.
73 dleftobj
->FalsePositiveRatio
= exp2(-rBit
)/(double)ArraySize
;
74 //dleftobj->ItemInsideAll=0;
75 //dleftobj->CellOverflowCount=0;
76 dleftobj
->Item_rBitMask
=(uint128_t
)((1LLU<<rBit
)-1u) << CountBit
;
77 dleftobj
->Item_CountBitMask
=(1LLU<<CountBit
)-1u; // == maxCount
78 dleftobj
->Hash_ArrayBitMask
=(1LLU<<ArrayBit
)-1u;
79 dleftobj
->Hash_rBitMask
=(1LLU<<rBit
)-1u;
83 void fprintSDLAnfo(FILE *stream
, const SDLeftArray_t
* dleftobj
){
84 fprintf(stream
,"SDLA(%#zx) -> {\n\
85 Size:[r:%uB+cnt:%uB]*Item:%zd(%.3f~%uB[enc:%u])*subArray:%u = %g MiB\n\
86 Hash:%u*%uB ItemByte:%u MaxCountSeen:%lu%s\n\
87 Designed Capacity:%lu ItemCount:%lu, with Overflow:%lu\n\
88 FP:%g, estimated FP item count:%.10g\n\
92 dleftobj
->rBit
,dleftobj
->CountBit
,dleftobj
->ArraySize
,log2(dleftobj
->ArraySize
),
93 dleftobj
->ArrayBit
,dleftobj
->EncodedBit
,dleftobj
->SubItemCount
,(double)dleftobj
->SubItemCount
*dleftobj
->itemByte
*dleftobj
->ArraySize
/1048576,
94 1,HASH_LENB
,dleftobj
->itemByte
,dleftobj
->maxCountSeen
,(dleftobj
->ItemInsideAll
)?"":"(=0, as SDLA is empty)",
95 dleftobj
->ArraySize
*(dleftobj
->SubItemCount
*3/4),
96 dleftobj
->ItemInsideAll
,dleftobj
->CellOverflowCount
,
97 dleftobj
->FalsePositiveRatio
,dleftobj
->ItemInsideAll
*dleftobj
->FalsePositiveRatio
,
100 fprintf(stream," Item_rBitMask:[%016lX %016lX]\n", (uint64_t)(dleftobj->Item_rBitMask>>64), (uint64_t)dleftobj->Item_rBitMask);
101 fprintf(stream," Item_CountBitMask:[%016lX]\n", dleftobj->Item_CountBitMask);
102 uint128_t check = dleftobj->Item_rBitMask + dleftobj->Item_CountBitMask;
103 fprintf(stream," Sum:[%016lX %016lX]\n", (uint64_t)(check>>64), (uint64_t)check);
104 fprintf(stream," Hash_ArrayBitMask:[%016lX]\n", dleftobj->Hash_ArrayBitMask);
105 fprintf(stream," Hash_rBitMask:[%016lX]\n", dleftobj->Hash_rBitMask);
107 fputs("}\n", stream
);