4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
31 #define PROTECTED_FUNC
33 #define EDGEIDBLOCKSIZE 10000
36 static const kmer_t2 empty_kmer2
= {{0, 0, 0, 0}, 0, 0, 0, 0};
38 static inline ubyte8
modular ( KmerSet2
* set
, Kmer seq
)
41 temp
= ( seq
.high1
% set
->size
) << 32 | ( seq
.low1
>> 32 & 0xffffffff );
42 temp
= ( temp
% set
->size
) << 32 | ( seq
.low1
& 0xffffffff );
43 temp
= ( temp
% set
->size
) << 32 | ( seq
.high2
>> 32 & 0xffffffff );
44 temp
= ( temp
% set
->size
) << 32 | ( seq
.high2
& 0xffffffff );
45 temp
= ( temp
% set
->size
) << 32 | ( seq
.low2
>> 32 & 0xffffffff );
46 temp
= ( temp
% set
->size
) << 32 | ( seq
.low2
& 0xffffffff );
47 temp
= ( ubyte8
) ( temp
% set
->size
);
51 static const kmer_t2 empty_kmer2
= {{0, 0}, 0, 0, 0, 0};
52 static inline ubyte8
modular ( KmerSet2
* set
, Kmer seq
)
56 temp
= Kmer2int128 ( seq
);
57 hc
= temp
% set
->size
;
62 /*************************************************
66 Updates the id and flag of the kmer.
68 1. mer: the current kmer
70 3. flag: the type of kmer(00: big "to kmer"; 01: big "from kmer"; 10: small "to kmer"; 11: small "from kmer")
75 *************************************************/
76 PUBLIC_FUNC
void update_kmer2 ( kmer_t2
* mer
, int id
, char flag
)
78 struct edgeID
* edgeid
;
79 edgeid
= ( struct edgeID
* ) malloc ( sizeof ( struct edgeID
) );
85 { edgeid
->next
= mer
->edgeId
; }
91 /*************************************************
95 Initializes the new kmer.
97 1. mer: the point of the new kmer
98 2. seq: the sequence of the kmer
100 4. flag: the type of kmer(00: big "to kmer"; 01: big "from kmer"; 10: small "to kmer"; 11: small "from kmer")
105 *************************************************/
106 PUBLIC_FUNC
void set_new_kmer2 ( kmer_t2
* mer
, Kmer seq
, int id
, char flag
)
109 set_kmer_seq ( *mer
, seq
);
110 update_kmer2 ( mer
, id
, flag
);
113 //Whether it's a prime number.
114 static inline int is_prime_kh ( ubyte8 num
)
118 if ( num
< 4 ) { return 1; }
120 if ( num
% 2 == 0 ) { return 0; }
122 max
= ( ubyte8
) sqrt ( ( float ) num
);
124 for ( i
= 3; i
< max
; i
+= 2 ) { if ( num
% i
== 0 ) { return 0; } }
129 //Find next prime number.
130 static inline ubyte8
find_next_prime_kh ( ubyte8 num
)
132 if ( num
% 2 == 0 ) { num
++; }
134 while ( 1 ) { if ( is_prime_kh ( num
) ) { return num
; } num
+= 2; }
137 /*************************************************
141 Initializes the kmerset.
143 1. init_size: the initial size of the kmerset
144 2. load_factor: load factor of hash
149 *************************************************/
150 PUBLIC_FUNC KmerSet2
* init_kmerset2 ( ubyte8 init_size
, float load_factor
)
154 if ( init_size
< 3 ) { init_size
= 3; }
155 else { init_size
= find_next_prime_kh ( init_size
); }
157 set
= ( KmerSet2
* ) malloc ( sizeof ( KmerSet2
) );
158 set
->size
= init_size
;
160 set
->max
= set
->size
* load_factor
;
162 if ( load_factor
<= 0 ) { load_factor
= 0.25f
; }
163 else if ( load_factor
>= 1 ) { load_factor
= 0.75f
; }
165 set
->load_factor
= load_factor
;
167 set
->array
= calloc ( set
->size
, sizeof ( kmer_t2
) );
168 set
->flags
= malloc ( ( set
->size
+ 15 ) / 16 * 4 );
169 memset ( set
->flags
, 0x55, ( set
->size
+ 15 ) / 16 * 4 );
173 PROTECTED_FUNC
static inline ubyte8
get_kmerset2 ( KmerSet2
* set
, Kmer seq
)
176 // hc = modular (set, seq);
178 hc
= modular ( set
, seq
);
181 temp
= Kmer2int128 ( seq
);
182 hc
= temp
% set
->size
;
187 if ( is_kmer_entity_null ( set
->flags
, hc
) )
193 if ( KmerEqual ( get_kmer_seq ( set
->array
[hc
] ), seq
) )
199 if ( hc
== set
->size
) { hc
= 0; }
205 /*************************************************
209 Search kmer in kmerset.
213 3. rs: the record node
218 *************************************************/
219 PUBLIC_FUNC
int search_kmerset2 ( KmerSet2
* set
, Kmer seq
, kmer_t2
** rs
)
222 // hc = modular (set, seq);
224 hc
= modular ( set
, seq
);
227 temp
= Kmer2int128 ( seq
);
228 hc
= temp
% set
->size
;
233 if ( is_kmer_entity_null ( set
->flags
, hc
) )
239 if ( KmerEqual ( get_kmer_seq ( set
->array
[hc
] ), seq
) )
241 *rs
= set
->array
+ hc
;
248 if ( hc
== set
->size
) { hc
= 0; }
254 /*************************************************
258 Whether the kmer exists in kmerset.
266 *************************************************/
267 PUBLIC_FUNC
static inline int exists_kmerset ( KmerSet2
* set
, Kmer seq
)
270 idx
= get_kmerset2 ( set
, seq
);
271 return !is_kmer_entity_null ( set
->flags
, idx
);
274 /*************************************************
278 Enlarges the kmerset if necessary.
280 1. set: the hash of kmerset
281 2. num: the element number add to kmerset
286 *************************************************/
287 PROTECTED_FUNC
static inline void encap_kmerset2 ( KmerSet2
* set
, ubyte8 num
)
290 ubyte8 i
, n
, size
, hc
;
293 if ( set
->count
+ num
<= set
->max
) { return; }
295 if ( initKmerSetSize
!= 0 )
297 if ( set
->load_factor
< 0.88 )
299 set
->load_factor
= 0.88;
300 set
->max
= set
->size
* set
->load_factor
;
305 fprintf ( stderr
, "-- Static memory pool exploded, please define a larger value. --\nloadFactor\t%f\nsize\t%llu\ncnt\t%llu\n", set
->load_factor
, set
->size
, set
->count
);
314 if ( n
< 0xFFFFFFFU
)
319 n
= find_next_prime_kh ( n
);
321 while ( n
* set
->load_factor
< set
->count
+ num
);
323 set
->array
= realloc ( set
->array
, n
* sizeof ( kmer_t2
) );
325 if ( set
->array
== NULL
)
327 fprintf ( stderr
, "-- Out of memory --\n" );
331 flags
= malloc ( ( n
+ 15 ) / 16 * 4 );
332 memset ( flags
, 0x55, ( n
+ 15 ) / 16 * 4 );
335 set
->max
= n
* set
->load_factor
;
341 for ( i
= 0; i
< size
; i
++ )
343 if ( !exists_kmer_entity ( flags
, i
) ) { continue; }
346 set_kmer_entity_del ( flags
, i
);
350 hc
= modular ( set
, get_kmer_seq ( key
) );
352 hc
= modular ( set
, get_kmer_seq ( key
) );
354 temp
= Kmer2int128 ( get_kmer_seq ( key
) );
355 hc
= temp
% set
->size
;
358 while ( !is_kmer_entity_null ( set
->flags
, hc
) ) { hc
++; if ( hc
== set
->size
) { hc
= 0; } }
360 clear_kmer_entity_null ( set
->flags
, hc
);
362 if ( hc
< size
&& exists_kmer_entity ( flags
, hc
) )
365 key
= set
->array
[hc
];
366 set
->array
[hc
] = tmp
;
367 set_kmer_entity_del ( flags
, hc
);
371 set
->array
[hc
] = key
;
380 /*************************************************
384 Puts kmer into the kmerset.
386 1. set: the hash of kmerset
387 2. seq: the sequence of the kmer
388 3. id: the id of edge
389 4. flag: the type of kmer(00: big "to kmer"; 01: big "from kmer"; 10: small "to kmer"; 11: small "from kmer")
390 5. kmer_p: used to record the info of the kmer
394 1 if it's successfully put kmer into kmerset.
395 *************************************************/
396 PUBLIC_FUNC
int put_kmerset2 ( KmerSet2
* set
, Kmer seq
, int id
, char flag
, kmer_t2
** kmer_p
)
400 if ( set
->count
+ 1 > set
->max
)
402 encap_kmerset2 ( set
, 1 );
405 // hc = modular (set, seq);
407 hc
= modular ( set
, seq
);
410 temp
= Kmer2int128 ( seq
);
411 hc
= temp
% set
->size
;
416 if ( is_kmer_entity_null ( set
->flags
, hc
) )
418 clear_kmer_entity_null ( set
->flags
, hc
);
419 set_new_kmer2 ( set
->array
+ hc
, seq
, id
, flag
);
421 *kmer_p
= set
->array
+ hc
;
426 if ( KmerEqual ( get_kmer_seq ( set
->array
[hc
] ), seq
) )
428 update_kmer2 ( set
->array
+ hc
, id
, flag
);
429 *kmer_p
= set
->array
+ hc
;
436 if ( hc
== set
->size
) { hc
= 0; }
444 /*************************************************
448 Returns the kmer number of the kmerset.
454 The kmer number of the kmerset.
455 *************************************************/
456 PUBLIC_FUNC byte8
count_kmerset2 ( KmerSet2
* set
) { return set
->count
; }
458 PUBLIC_FUNC
static inline void reset_iter_kmerset2 ( KmerSet2
* set
) { set
->iter_ptr
= 0; }
460 PUBLIC_FUNC
static inline ubyte8
iter_kmerset2 ( KmerSet2
* set
, kmer_t2
** rs
)
462 while ( set
->iter_ptr
< set
->size
)
464 if ( !is_kmer_entity_null ( set
->flags
, set
->iter_ptr
) )
466 *rs
= set
->array
+ set
->iter_ptr
;
478 PUBLIC_FUNC
void free_kmerset2 ( KmerSet2
* set
)
481 struct edgeID
* temp
, *temp_next
;
485 while ( set
->iter_ptr
< set
->size
)
487 if ( !is_kmer_entity_null ( set
->flags
, set
->iter_ptr
) )
489 node
= set
->array
+ set
->iter_ptr
;
497 temp_next
= temp
->next
;
498 free ( ( void * ) temp
);
515 /*************************************************
519 Free all the kmersets.
521 1. sets: the array of the kmerset
522 2. num: the number of kmerset
527 *************************************************/
528 PUBLIC_FUNC
void free_Sets2 ( KmerSet2
** sets
, int num
)
532 for ( i
= 0; i
< num
; ++i
)
534 free_kmerset2 ( sets
[i
] );
538 free ( ( void * ) sets
);