1 #include <stdlib.h> //calloc
2 #include <stdint.h> // uint_fast8_t
3 #include <math.h> //log2, ceil
4 #include <stdio.h> //fprintf, fseek
6 #include <string.h> //strcmp, strncpy
8 #include <endian.h> //BYTE_ORDER, LITTLE_ENDIAN 1234
9 //#include <asm/byteorder.h> // __LITTLE_ENDIAN_BITFIELD or __BIG_ENDIAN_BITFIELD
14 //#include "sdleftTF.h"
15 #include "MurmurHash3.h"
19 #define HASHSEED (0x3ab2ae35)
21 #if BYTE_ORDER != LITTLE_ENDIAN
22 #error We focus on Little Endian now.
25 unsigned char GETitemByte_PADrBit_trimSubItemCount(unsigned char CountBit
, unsigned char *prBit
, uint16_t *pSubItemCount
){
26 unsigned char itemByte
= (CountBit
+*prBit
+7u) >> 3; // 2^3=8
27 *prBit
= (itemByte
<<3) - CountBit
;
28 #ifndef TEST /* SubItemCount is trimed on rBit only */
29 double maxSubItemByR
= floor(pow(2.0,(double)*prBit
));
30 if ( *pSubItemCount
> maxSubItemByR
) *pSubItemCount
= (uint16_t)maxSubItemByR
; // safe since *pSubItemCount was bigger
36 SDLeftArray_t
*dleft_arraynew(const unsigned char CountBit
, const SDLConfig
* const psdlcfg
){
39 uint16_t SubItemCount
;
41 return dleft_arrayinit(CountBit
, ArraySize
, SubItemCount
);
45 SDLeftArray_t
*dleft_arrayinit(unsigned char CountBit
, size_t ArraySize
, uint16_t SubItemCount
) {
46 if (ArraySize
<2u || ArraySize
>(1LLU<<63) || CountBit
<3u || CountBit
>8u*sizeof(uint64_t) || SubItemCount
<1u ) {
47 err(EXIT_FAILURE
, "[x]Wrong D Left Array Parameters:(%d)[%u]x%zd ",CountBit
,SubItemCount
,ArraySize
);
48 } // CountBit+rBit >= 9, makes uint16_t always OK
49 unsigned char ArrayBit
= ceil(log2(ArraySize
));
50 unsigned char rBit
= ArrayBit
+ ENCODEDBIT_ENTROPY_PAD
;
51 /* if (ArraySize<2u || CountBit<3u || rBit<6u || rBit>8u*sizeof(uint64_t) || CountBit>8u*sizeof(uint64_t) || SubItemCount<1u ) {
52 err(EXIT_FAILURE, "[x]Wrong D Left Array Parameters:(%d+%d)[%u]x%zd ",rBit,CountBit,SubItemCount,ArraySize);
53 } // CountBit+rBit >= 9, makes uint16_t always OK
55 #ifdef TEST /* Test mode, keep rBit, pad CountBit */
56 unsigned char itemByte
= GETitemByte_PADrBit_trimSubItemCount(rBit
,&CountBit
,&SubItemCount
);
57 #else /* Normal, keep CountBit, pad rBit */
58 unsigned char itemByte
= GETitemByte_PADrBit_trimSubItemCount(CountBit
,&rBit
,&SubItemCount
);
60 SDLeftArray_t
*dleftobj
= calloc(1,sizeof(SDLeftArray_t
)); // set other int to 0
61 dleftobj
->SDLAbyte
= (SubItemCount
*itemByte
*ArraySize
+127u)&(~(size_t)127u); // We are reading in uint128_t now.
62 dleftobj
->pDLA
= calloc(1,dleftobj
->SDLAbyte
);
63 int mlock_r
= mlock(dleftobj
->pDLA
,dleftobj
->SDLAbyte
);
64 if (mlock_r
) warn("[!]Cannot lock SDL array in memory. Performance maybe reduced.");
65 //unsigned char SDLArray[ArraySize][SubItemCount][itemByte];
66 //the GNU C Compiler allocates memory for VLAs on the stack, >_<
67 dleftobj
->CountBit
= CountBit
;
68 dleftobj
->rBit
= rBit
;
69 dleftobj
->ArrayBit
= ArrayBit
;
70 dleftobj
->itemByte
= itemByte
;
71 dleftobj
->ArraySize
= ArraySize
;
72 dleftobj
->SubItemCount
= SubItemCount
;
73 dleftobj
->SubItemByUnit
= SubItemCount
/SDL_SUBARRAY_UNIT
;
74 dleftobj
->maxCountSeen
= 1; //if SDLA is not empty, maxCountSeen>=1.
75 /* only one 128bit hash needed.
76 dleftobj->ArrayCount = ArrayCount;
77 dleftobj->HashCnt = (rBit+ArrayBit+(HASH_LENB-1))/HASH_LENB;
78 dleftobj->outhash = malloc(dleftobj->HashCnt * HASH_LENB/8);*/
79 dleftobj
->FalsePositiveRatio
= exp2(-rBit
)/(double)ArraySize
;
80 dleftobj
->ItemInsideAll
=0;
81 dleftobj
->CellOverflowCount
=0;
82 dleftobj
->Item_rBitMask
=(uint128_t
)((1LLU<<rBit
)-1u) << CountBit
;
83 dleftobj
->Item_CountBitMask
=(1LLU<<CountBit
)-1u; // == maxCount
84 dleftobj
->Hash_ArrayBitMask
=(1LLU<<ArrayBit
)-1u;
85 dleftobj
->Hash_rBitMask
=(1LLU<<rBit
)-1u;
89 void fprintSDLAnfo(FILE *stream
, const SDLeftArray_t
* dleftobj
){
90 fprintf(stream
,"SDLA(%#zx) -> {\n\
91 Size:[r:%uB+cnt:%uB]*Item:%zd(%.3f~%uB)*subArray:%u = %g MiB\n\
92 Hash:%u*%uB ItemByte:%u MaxCountSeen:%lu%s\n\
93 Designed Capacity:%lu ItemCount:%lu, with Overflow:%lu\n\
94 FP:%g, estimated FP item count:%.10g\n\
98 dleftobj
->rBit
,dleftobj
->CountBit
,dleftobj
->ArraySize
,log2(dleftobj
->ArraySize
),
99 dleftobj
->ArrayBit
,dleftobj
->SubItemCount
,(double)dleftobj
->SubItemCount
*dleftobj
->itemByte
*dleftobj
->ArraySize
/1048576,
100 1,HASH_LENB
,dleftobj
->itemByte
,dleftobj
->maxCountSeen
,(dleftobj
->ItemInsideAll
)?"":"(=0, as SDLA is empty)",
101 dleftobj
->ArraySize
*(dleftobj
->SubItemCount
*3/4),
102 dleftobj
->ItemInsideAll
,dleftobj
->CellOverflowCount
,
103 dleftobj
->FalsePositiveRatio
,dleftobj
->ItemInsideAll
*dleftobj
->FalsePositiveRatio
,
106 fprintf(stream," Item_rBitMask:[%016lX %016lX]\n", (uint64_t)(dleftobj->Item_rBitMask>>64), (uint64_t)dleftobj->Item_rBitMask);
107 fprintf(stream," Item_CountBitMask:[%016lX]\n", dleftobj->Item_CountBitMask);
108 uint128_t check = dleftobj->Item_rBitMask + dleftobj->Item_CountBitMask;
109 fprintf(stream," Sum:[%016lX %016lX]\n", (uint64_t)(check>>64), (uint64_t)check);
110 fprintf(stream," Hash_ArrayBitMask:[%016lX]\n", dleftobj->Hash_ArrayBitMask);
111 fprintf(stream," Hash_rBitMask:[%016lX]\n", dleftobj->Hash_rBitMask);
113 fputs("}\n", stream
);
116 FORCE_INLINE
uint32_t rotl32 ( uint32_t x
, int8_t r
){
117 return (x
<< r
) | (x
>> (32 - r
));
120 // this is POP, thus the input *pdat will be changed/shifted.
121 // Max bits is the same as size_t
122 // this is a static function, bits has already been <= 8*sizeof(size_t)
123 // static function, thus the input parameters should be clean
124 FORCE_INLINE
uint64_t popLowestBits(unsigned char bits
, uint64_t *pdat
, uint_fast8_t *pdatLenu64t
){
125 //unsigned char lastBits = bits % (8*sizeof(size_t));
126 unsigned char nullBits
= 8*sizeof(uint64_t)-bits
;
127 //char MoreUnit = (bits-lastBits)/sizeof(size_t);
128 uint64_t bitMask
= (1LLU<<bits
)-1U;
129 uint64_t outUnit
= pdat
[0] & bitMask
;
130 //printf("[b]%d,%d,%016zx,%016zx\n",bits,nullBits,bitMask,outUnit);
132 for(i
=1;i
<*pdatLenu64t
;i
++){
133 uint64_t tmpUnit
= *(pdat
+i
) & bitMask
;
134 *(pdat
+i
-1) = (*(pdat
+i
-1) >> bits
) | (tmpUnit
<< nullBits
);
136 *(pdat
+i
-1) = *(pdat
+i
-1) >> bits
;
137 *pdatLenu64t
-= bits
/(8*sizeof(uint64_t));
138 //printf("[c][%016lx,%016lx]\n",pdat[0],pdat[1]);
142 void *searchSubArray(unsigned char* pChunk
){}
145 FORCE_INLINE
void incSDLArray(size_t ArrayBits
, uint64_t rBits
, SDLeftArray_t
*dleftobj
){
146 //size_t ArrayPos = ArrayBits % dleftobj->ArraySize;
147 size_t ArrayPos
= ((uint128_t
)ArrayBits
*(uint128_t
)dleftobj
->ArraySize
) >> dleftobj
->ArrayBit
;
148 size_t relAddr
= ArrayPos
*dleftobj
->SubItemCount
*dleftobj
->itemByte
;
149 unsigned char* pChunk
= (unsigned char*)dleftobj
->pDLA
+ relAddr
;
150 unsigned char* pEndChunk
= (unsigned char*)pChunk
+ dleftobj
->SubItemCount
*dleftobj
->itemByte
;
151 //unsigned char BoundaryByteRel = rBits % 8; // Well, I will not touch the egg-head ...
152 uint64_t Item_rBits
, Item_CountBits
;
157 unsigned char byte[16];
161 uint16_t SubItemByUnit
=dleftobj
->SubItemByUnit
;
162 unsigned char itemByte
=dleftobj
->itemByte
;
163 pthread_t
*ptWorkers
=malloc(sizeof(pthread_t
)*SubItemByUnit
);
164 for (uint16_t i
=0;i
<SubItemByUnit
;i
++) {
165 pthread_create(ptWorkers
+i
,NULL
,&searchSubArray
,pChunk
+i
*itemByte
);
168 for (uint16_t i
=0;i
<SubItemByUnit
;i
++) {
169 pthread_join(*(ptWorkers
+i
),NULL
);
174 while (pChunk
< pEndChunk
) {
177 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
178 theItem
|= ((uint128_t
)*(pChunk
+i
)) << (i
*8u);
179 //theItem.byte[i] = *(pChunk+i);
181 #elif defined NEW /* faster for one less register shift operation for memory uint8_t */
183 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
184 theItem
= theItem
<< 8u;
185 theItem
|= *(pChunk
+i
);
186 //theItem.byte[i] = *(pChunk+i);
188 #elif BYTE_ORDER == LITTLE_ENDIAN
189 theItem
= *(uint128_t
*)pChunk
;
191 #error Faster version is Little Endian, choose OLD or NEW to define !
193 Item_CountBits
= theItem
& dleftobj
->Item_CountBitMask
;
194 if (Item_CountBits
== 0) { // reaching the pre-end
196 ++dleftobj
->ItemInsideAll
;
197 //printf("<%lu>",dleftobj->ItemInsideAll);
200 Item_rBits
= (theItem
& dleftobj
->Item_rBitMask
) >> dleftobj
->CountBit
;
201 if (Item_rBits
== rBits
) {
202 if (Item_CountBits
< dleftobj
->Item_CountBitMask
) {
204 if (Item_CountBits
> dleftobj
->maxCountSeen
) {
205 dleftobj
->maxCountSeen
= Item_CountBits
;
207 fprintf(stderr
,"[sdlm][%zu][%lu]:[%lx],[%lu] [%016lx %016lx]\n",
208 ArrayPos
,dleftobj
->SubItemCount
-(pEndChunk
-pChunk
)/dleftobj
->itemByte
,rBits
,Item_CountBits
,(uint64_t)(theItem
>>64u),(uint64_t)theItem
);
210 } // if SDLA is not empty, maxCountSeen>=1, no need to check when Item_CountBits == 0.
213 ++dleftobj
->CountBitOverflow
;
217 pChunk
+= dleftobj
->itemByte
;
218 } // reaching the structure-end
219 if (pChunk
< pEndChunk
) {
220 //printf("Old:%zu[%lx %lx]<-[%lx][%lx][%lx %lx] ",(size_t)((char*)pChunk - (char*)dleftobj->pDLA)-relAddr,(uint64_t)(theItem>>64),(uint64_t)theItem,rBits,Item_CountBits,(uint64_t)((((uint128_t)rBits)<<dleftobj->CountBit)>>64),(uint64_t)(((uint128_t)rBits)<<dleftobj->CountBit));
222 theItem &= ~dleftobj->CountBit;
223 theItem |= Item_CountBits;
224 *(uint128_t*)pChunk = theItem;
226 theItem
= (((uint128_t
)rBits
)<<dleftobj
->CountBit
) | Item_CountBits
;
228 //printf("Old: %lx, %lx\n",(uint64_t)(theItem>>64u),(uint64_t)theItem);
229 pChunk
+= dleftobj
->itemByte
-1;
230 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
231 *pChunk
-- = (uint8_t)(theItem
>>(i
*8u));
236 for (uint_fast8_t i=0;i<dleftobj->itemByte;i++) {
237 theItem = theItem << 8u;
238 theItem |= (uint128_t)*(pChunk+i);
239 //theItem.byte[i] = *(pChunk+i);
241 printf("New: %lx, %lx\n",(uint64_t)(theItem>>64u),(uint64_t)theItem);
243 #elif (BYTE_ORDER == LITTLE_ENDIAN) || (defined OLD)
244 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
245 uint128_t tmpMask
= ((uint128_t
)0xffLLU
) << (i
*8u);
246 *pChunk
++ = (theItem
& tmpMask
)>>(i
*8u);
249 #error Faster version is Little Endian, choose OLD or NEW to define !
251 /*printf("New:%zu[%lx %lx] ",(size_t)((char*)pChunk - (char*)dleftobj->pDLA)-relAddr,(uint64_t)(theItem>>64),(uint64_t)theItem);
252 printf("Mem:%zu[",(size_t)((char*)pChunk - (char*)dleftobj->pDLA)-relAddr);
253 for (uint_fast8_t i=0;i<dleftobj->itemByte;i++) {
254 printf("%x ",*(pChunk-dleftobj->itemByte+i));
257 } else { // reaching the structure-end
258 ++dleftobj
->CellOverflowCount
;
262 // return 0 for not found
263 FORCE_INLINE
uint64_t querySDLArray(size_t ArrayBits
, uint64_t rBits
, SDLeftArray_t
*dleftobj
){
264 size_t ArrayPos
= ((uint128_t
)ArrayBits
*(uint128_t
)dleftobj
->ArraySize
) >> dleftobj
->ArrayBit
;
265 size_t relAddr
= ArrayPos
*dleftobj
->SubItemCount
*dleftobj
->itemByte
;
266 unsigned char* pChunk
= (unsigned char*)dleftobj
->pDLA
+ relAddr
;
267 unsigned char* pEndChunk
= (unsigned char*)pChunk
+ dleftobj
->SubItemCount
*dleftobj
->itemByte
;
269 uint64_t Item_CountBits
=0; // set value in case SubItemCount*dleftobj->itemByte == 0 (EP ?)
271 while (pChunk
< pEndChunk
) {
274 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
275 theItem
|= ((uint128_t
)*(pChunk
+i
)) << (i
*8u);
277 #elif defined NEW /* faster for one less register shift operation for memory uint8_t */
279 for (uint_fast8_t i
=0;i
<dleftobj
->itemByte
;i
++) {
280 theItem
= theItem
<< 8u;
281 theItem
|= *(pChunk
+i
);
282 //theItem.byte[i] = *(pChunk+i);
284 #elif BYTE_ORDER == LITTLE_ENDIAN
285 theItem
= *(uint128_t
*)pChunk
;
287 #error Faster version is Little Endian, choose OLD or NEW to define !
289 Item_CountBits
= theItem
& dleftobj
->Item_CountBitMask
;
290 if (Item_CountBits
== 0) { // reaching the pre-end, not found
293 Item_rBits
= (theItem
& dleftobj
->Item_rBitMask
) >> dleftobj
->CountBit
;
294 if (Item_rBits
== rBits
) { // found
297 pChunk
+= dleftobj
->itemByte
;
299 if (pChunk
>= pEndChunk
) { // reaching the structure-end
302 return Item_CountBits
;
307 // return 0 for not found
308 void travelSDLArray(SDLeftArray_t
*dleftobj
, FILE *fpdat
){
312 uint64_t Item_CountBits
=0; // set value in case SubItemCount*dleftobj->itemByte == 0 (EP ?)
314 unsigned char* pChunk
= (unsigned char*)dleftobj
->pDLA
;
315 unsigned char* pEndChunk
= (unsigned char*)pChunk
+ dleftobj
->SDLAbyte
;
316 fputs("ItemNo,rBit\tCount\n",fpdat
);
317 while (pChunk
< pEndChunk
) {
318 #if BYTE_ORDER == LITTLE_ENDIAN
319 theItem
= *(uint128_t
*)pChunk
;
321 #error Faster version is Little Endian, choose OLD or NEW to define !
323 Item_CountBits
= theItem
& dleftobj
->Item_CountBitMask
;
324 Item_rBits
= (theItem
& dleftobj
->Item_rBitMask
) >> dleftobj
->CountBit
;
325 if (Item_CountBits
) {
326 fprintf(fpdat
, "%zx,%lX\t%lu\n",pChunk
, Item_rBits
, Item_CountBits
);
328 pChunk
+= dleftobj
->itemByte
;
335 FORCE_INLINE
int_fast8_t dleft_insert_kmer(const char *const kmer
, const size_t len
, SDLeftArray_t
*dleftobj
,
336 uint64_t **dibskmer
,size_t * const uint64cnt
) {
337 char* revcomkmer
= ChrSeqRevComp(kmer
,len
);
338 const char *const smallerkmer
= (strcmp(kmer
,revcomkmer
)<=0)?kmer
:revcomkmer
; // not strncmp since the first odd len bytes mush be different.
340 printf("[%zd]->[%s,%s,%s]\n",len
,kmer
,revcomkmer
,smallerkmer
);
342 size_t bytelen
= (len
+3u)/4;
343 size_t Ncount
= ChrSeq2dib(smallerkmer
,len
,dibskmer
,uint64cnt
);
344 //printf("%zd:%zd:[%016lx][%016lx]->[%s] (%zd)\n",*uint64cnt,bytelen,*dibskmer[0],*dibskmer[1],dib2basechr(*dibskmer,len),Ncount);
345 //uint64_t *ptmpout = dleftobj->outhash;
346 //for(uint_fast8_t i=0;i<dleftobj->HashCnt;i++){
347 //uint32_t seed=rotl32(0x3ab2ae35-i,i);
348 //MurmurHash3_x64_128(*dibskmer,bytelen,seed,ptmpout);
351 MurmurHash3_x64_128(*dibskmer
,bytelen
,HASHSEED
,dleftobj
->outhash
);
352 //printf("[%016lx,%016lx]\n",dleftobj->outhash[0],dleftobj->outhash[1]);
355 uint_fast8_t datLenu64t
= HASH_LENB
/(8*sizeof(uint64_t));
356 //printf("[x][%016lx,%016lx]\n",popLowestBits(60,dleftobj->outhash,&datLenu64t),popLowestBits(56,dleftobj->outhash,&datLenu64t));
357 uint64_t rBits
= popLowestBits(dleftobj
->rBit
,dleftobj
->outhash
,&datLenu64t
);
358 size_t ArrayBits
= popLowestBits(dleftobj
->ArrayBit
,dleftobj
->outhash
,&datLenu64t
);
359 incSDLArray(ArrayBits
, rBits
, dleftobj
);
365 size_t dleft_insert_read(unsigned int k
, char const *const inseq
, size_t len
, SDLeftArray_t
*dleftobj
) {
367 size_t insertedCount
=0;
368 uint64_t *dibskmer
=NULL
;
369 size_t uint64cnt
= 0;
370 for (size_t i
=0;i
<=len
-k
;i
++) {
371 if (dleft_insert_kmer(inseq
+i
,k
,dleftobj
,&dibskmer
,&uint64cnt
)) {
376 return insertedCount
;
379 void dleft_dump(const SDLeftArray_t
* const dleftobj
, SDLdumpHead
* const pSDLeftStat
, FILE *stream
){
380 rewind(stream
); // for Binary file, position is important.
381 strncpy(pSDLeftStat
->FileID
,"GDSD",4);
382 pSDLeftStat
->FileVersion
[0]=0u;
383 pSDLeftStat
->FileVersion
[1]=1u;
384 //pSDLeftStat->extreebyte = 0;
385 pSDLeftStat
->SubItemCount
=dleftobj
->SubItemCount
;
386 pSDLeftStat
->CountBit
=dleftobj
->CountBit
;
387 pSDLeftStat
->rBit
=dleftobj
->rBit
;
388 pSDLeftStat
->ArraySize
=dleftobj
->ArraySize
;
389 pSDLeftStat
->SDLAbyte
=dleftobj
->SDLAbyte
;
390 pSDLeftStat
->ItemInsideAll
=dleftobj
->ItemInsideAll
;
391 pSDLeftStat
->CellOverflowCount
=dleftobj
->CellOverflowCount
;
392 pSDLeftStat
->CountBitOverflow
=dleftobj
->CountBitOverflow
;
393 pSDLeftStat
->maxCountSeen
=dleftobj
->maxCountSeen
;
394 pSDLeftStat
->crc32c
=0xffffffff; //later
396 unitwritten
=fwrite(pSDLeftStat
,sizeof(SDLdumpHead
),1u,stream
);
397 if (unitwritten
!= 1)
398 err(EXIT_FAILURE
, "Fail to write dat file ! [%zd]",unitwritten
);
399 unitwritten
=fwrite(dleftobj
->pDLA
,pSDLeftStat
->SDLAbyte
,1u,stream
);
400 if (unitwritten
!= 1)
401 err(EXIT_FAILURE
, "Cannot write to dat file ! [%zd]",unitwritten
);
402 printf("Cb %x rB %x AS %lx Size %lx HMC %lx HMH %lx\n",pSDLeftStat
->CountBit
,pSDLeftStat
->rBit
,pSDLeftStat
->ArraySize
,pSDLeftStat
->SDLAbyte
,pSDLeftStat
->HistMaxCntVal
,pSDLeftStat
->HistMaxHistVal
);
405 void dleft_arraydestroy(SDLeftArray_t
* const dleftobj
){
406 free(dleftobj
->pDLA
);
407 //free(dleftobj->outhash);
412 SDLeftStat_t
* dleft_stat(SDLeftArray_t
* const dleftobj
, FILE *stream
, FILE *fpdat
) {
414 SDLeftStat_t
* dleft_stat(SDLeftArray_t
* const dleftobj
, FILE *stream
) {
416 SDLeftStat_t
*pSDLeftStat
= malloc(sizeof(SDLeftStat_t
));
417 uint64_t * const pCountHistArray
= calloc(sizeof(uint64_t),1+dleftobj
->maxCountSeen
);
418 size_t totalDLAsize
= dleftobj
->SubItemCount
* dleftobj
->itemByte
* dleftobj
->ArraySize
;
419 const unsigned char * const pDLA
= dleftobj
->pDLA
;
420 uint64_t Item_CountBits
=0; // set value in case SDLA_ITEMARRAY*dleftobj->itemByte == 0 (EP ?)
423 uint16_t SubItemCount
= dleftobj
->SubItemCount
;
424 uint64_t * const pCountSubArray
= calloc(sizeof(uint64_t),1+SubItemCount
);
425 uint64_t * const pCountSubNoOneArray
= calloc(sizeof(uint64_t),1+SubItemCount
);
426 uint16_t SubItemUsed
=0;
427 uint16_t SubItemUsedCountIsOne
=0;
428 uint64_t ArraySize
= dleftobj
->ArraySize
;
429 size_t mainArrayID
=0;
431 travelSDLArray(dleftobj
, fpdat
);
434 unitwritten=fwrite(&ArraySize,sizeof(uint64_t),1u,fpdat);
435 if (unitwritten != 1)
436 err(EXIT_FAILURE, "Fail to write dat file ! [%zd]",unitwritten);
439 for (size_t i
=0;i
<totalDLAsize
;i
+=dleftobj
->itemByte
) {
440 //const unsigned char * pChunk = pDLA + i;
443 for (uint_fast8_t j
=0;j
<dleftobj
->itemByte
;j
++) {
444 theItem
|= ((uint128_t
)*(pDLA
+ i
+ j
)) << (j
*8u);
448 for (uint_fast8_t j
=0;j
<dleftobj
->itemByte
;j
++) {
449 theItem
= theItem
<< 8u;
450 theItem
|= *(pDLA
+ i
+ j
);
452 #elif BYTE_ORDER == LITTLE_ENDIAN
453 theItem
= *(uint128_t
*)(pDLA
+i
);
455 #error Faster version is Little Endian, choose OLD or NEW to define !
457 Item_CountBits
= (uint64_t)theItem
& dleftobj
->Item_CountBitMask
; // Item_CountBitMask is uint64_t.
458 ++pCountHistArray
[Item_CountBits
];
461 if (Item_CountBits
) {
463 if (Item_CountBits
==1) ++SubItemUsedCountIsOne
;
465 if (!(mainArrayID
% SubItemCount
)) {
466 ++pCountSubArray
[SubItemUsed
];
467 ++pCountSubNoOneArray
[SubItemUsed
-SubItemUsedCountIsOne
];
468 fwrite(&SubItemUsed
,sizeof(uint8_t),1u,fpdat
); // Well, just write 1 byte each. (LE)
469 //printf("-%zd %u\n",mainArrayID,SubItemUsed);
471 SubItemUsedCountIsOne
=0;
472 } else { // mainArrayID mod SubItemCount != 0. mainArrayID == (i+1)/dleftobj->itemByte .
473 //printf("---%zd %% %d = %zd %d\n",mainArrayID,SubItemCount,mainArrayID % SubItemCount,SubItemUsed);
477 //THETYPE HistSum=0; // HistSum == dleftobj->ItemInsideAll
478 float128 HistSumSquared
=0.0;
479 double SStd
; // We need to return in a fixed type for printf
480 for (size_t p
=1;p
<=dleftobj
->maxCountSeen
;p
++) {
481 //HistSum += pCountHistArray[p];
482 HistSumSquared
+= pCountHistArray
[p
] * pCountHistArray
[p
];
484 //http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
485 SStd
= sqrtl( ( HistSumSquared
-((long double)dleftobj
->ItemInsideAll
*(long double)dleftobj
->ItemInsideAll
/(long double)dleftobj
->maxCountSeen
) ) / (long double)(dleftobj
->maxCountSeen
-1) );
486 pSDLeftStat
->HistSStd
= SStd
;
487 pSDLeftStat
->HistMean
= (double)dleftobj
->ItemInsideAll
/ (double)dleftobj
->maxCountSeen
;
488 pSDLeftStat
->HistMaxCntVal
= 1; //later
489 pSDLeftStat
->HistMaxHistVal
= 1; //later
490 fprintf(stream
,"#Kmer_real_count: %ld\n#Kmer_count_hist: %ld\n"
491 "#Kmer_depth_mean: %f\n#Kmer_depth_sStd: %f\n"
492 "#CountBit_overflow: %lu\n"
493 "\n#Kmer_frequence\tHist_value\tKmer_count\tHist_ratio\n",
494 dleftobj
->ItemInsideAll
,dleftobj
->maxCountSeen
,pSDLeftStat
->HistMean
,SStd
,dleftobj
->CountBitOverflow
);
495 for (size_t p
=1;p
<=dleftobj
->maxCountSeen
;p
++) {
496 fprintf(stream
,"%zu\t%lu\t%lu\t%g\n",p
,(uint64_t)pCountHistArray
[p
],
497 (uint64_t)pCountHistArray
[p
]*(uint64_t)p
,(double)pCountHistArray
[p
]/(double)dleftobj
->ItemInsideAll
);
499 free(pCountHistArray
);
502 fprintf(stream
,"#-----------------------------------------------\n\n#Array_size: %lu\n\n"
503 "SubArrayFilled\tCount\tCount_without_1\n",ArraySize
);
504 for (uint16_t i
=0;i
<=SubItemCount
;i
++) {
505 fprintf(stream
,"%u\t%lu, %g\t%lu, %g\n",i
,pCountSubArray
[i
],(double)pCountSubArray
[i
]/(double)ArraySize
,pCountSubNoOneArray
[i
],(double)pCountSubNoOneArray
[i
]/(double)ArraySize
);
507 free(pCountSubArray
);
513 Speed test with OLD 2bitseqinline.h:
514 ./readscorr Saccharomyces_cerevisiae.cfg -o ttmp 2> ttmp.log
517 User: 170.445088(s), System: 0.400939(s). Real: 174.093140(s).
518 Sleep: 3.247113(s). Block I/O times: 0/0. MaxRSS: 0 kiB.
519 Wait(s): 62(nvcsw) + 54770(nivcsw). Page Fault(s): 41294(minflt) + 0(majflt).
522 User: 204.444919(s), System: 2.459626(s). Real: 215.708506(s).
523 Sleep: 8.803961(s). Block I/O times: 0/0. MaxRSS: 0 kiB.
524 Wait(s): 143(nvcsw) + 58522(nivcsw). Page Fault(s): 41294(minflt) + 0(majflt).
527 User: 133.809657(s), System: 1.160823(s). Real: 137.159722(s).
528 Sleep: 2.189242(s). Block I/O times: 0/0. MaxRSS: 0 kiB.
529 Wait(s): 64(nvcsw) + 82900(nivcsw). Page Fault(s): 41301(minflt) + 0(majflt).
533 zz <- file("oSCa8ms64r25k17.dat", "rb")
534 l=readBin(zz,"integer",size=8,signed=F)
535 a=readBin(zz,"integer",l,size=1,signed=F)
536 dim(a)<-c(l/8192,8192)
538 png("oSCa8ms64r25k17.png",2048,600)
539 plot(x=1:length(b),y=b)