4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
30 static long long * tips
;
31 static long long * kmerCounter
; //kmer number for each KmerSet[thread id]
33 static long long ** kmerFreq
;
35 //buffer related varibles for chop kmer
37 static char ** rcSeq
; //sequence pointer for each thread
38 static char ** seqBuffer
; //read buffer
39 static int * lenBuffer
; //read length buffer
40 static int * indexArray
; // the read's begin kmer 's kmer_c
42 //buffer related varibles for splay tree
43 static int buffer_size
= 100000000; //buffer size for kmerBuffer...
44 static volatile int kmer_c
; // kmer count number
45 static Kmer
* kmerBuffer
; //kmer buffer array
46 static ubyte8
* hashBanBuffer
; //the buffered hash value for 'kmerBuffer'
47 static char * nextcBuffer
, *prevcBuffer
; //next char buffer , previous char buffer for 'kmerBuffer'
49 static struct aiocb aio1
;
50 static struct aiocb aio2
;
51 static char * aioBuffer1
;
52 static char * aioBuffer2
;
53 static char * readBuffer1
;
54 static char * readBuffer2
;
56 static void thread_mark ( KmerSet
* set
, unsigned char thrdID
);
57 static void Mark1in1outNode ( unsigned char * thrdSignal
);
58 static void thread_delow ( KmerSet
* set
, unsigned char thrdID
);
59 static void deLowCov ( unsigned char * thrdSignal
);
61 static void singleKmer ( int t
, KmerSet
* kset
);
62 static void chopKmer4read ( int t
, int threadID
);
64 static void freqStat ( char * outfile
);
66 static void threadRoutine ( void * para
)
71 prm
= ( PARAMETER
* ) para
;
74 //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
77 if ( * ( prm
->selfSignal
) == 1 )
79 for ( i
= 0; i
< kmer_c
; i
++ )
81 //if((unsigned char)(magic_seq(hashBanBuffer[i])%thrd_num)!=id)
82 //if((kmerBuffer[i]%thrd_num)!=id)
83 if ( ( hashBanBuffer
[i
] % thrd_num
) != id
)
88 kmerCounter
[id
+ 1]++;
89 singleKmer ( i
, KmerSets
[id
] );
92 * ( prm
->selfSignal
) = 0;
94 else if ( * ( prm
->selfSignal
) == 2 )
96 for ( i
= 0; i
< read_c
; i
++ )
98 if ( i
% thrd_num
!= id
)
103 chopKmer4read ( i
, id
+ 1 );
106 * ( prm
->selfSignal
) = 0;
108 else if ( * ( prm
->selfSignal
) == 3 )
110 * ( prm
->selfSignal
) = 0;
113 else if ( * ( prm
->selfSignal
) == 4 )
115 thread_mark ( KmerSets
[id
], id
);
116 * ( prm
->selfSignal
) = 0;
118 else if ( * ( prm
->selfSignal
) == 5 )
120 thread_delow ( KmerSets
[id
], id
);
121 * ( prm
->selfSignal
) = 0;
128 static void singleKmer ( int t
, KmerSet
* kset
)
131 put_kmerset ( kset
, kmerBuffer
[t
], prevcBuffer
[t
], nextcBuffer
[t
], &pos
);
134 static void creatThrds ( pthread_t
* threads
, PARAMETER
* paras
)
139 for ( i
= 0; i
< thrd_num
; i
++ )
141 //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
142 if ( ( temp
= pthread_create ( &threads
[i
], NULL
, ( void * ) threadRoutine
, & ( paras
[i
] ) ) ) != 0 )
144 fprintf ( stderr
, "Create threads failed.\n" );
149 fprintf ( stderr
, "%d thread(s) initialized.\n", thrd_num
);
152 static void thread_wait ( pthread_t
* threads
)
156 for ( i
= 0; i
< thrd_num
; i
++ )
157 if ( threads
[i
] != 0 )
159 pthread_join ( threads
[i
], NULL
);
163 static void chopKmer4read ( int t
, int threadID
)
165 char * src_seq
= seqBuffer
[t
];
166 char * bal_seq
= rcSeq
[threadID
];
167 int len_seq
= lenBuffer
[t
];
169 ubyte8 hash_ban
, bal_hash_ban
;
174 word
.high1
= word
.low1
= word
.high2
= word
.low2
= 0;
176 for ( index
= 0; index
< overlaplen
; index
++ )
178 word
= KmerLeftBitMoveBy2 ( word
);
179 word
.low2
|= src_seq
[index
];
183 word
.high
= word
.low
= 0;
185 for ( index
= 0; index
< overlaplen
; index
++ )
187 word
= KmerLeftBitMoveBy2 ( word
);
188 word
.low
|= src_seq
[index
];
192 reverseComplementSeq ( src_seq
, len_seq
, bal_seq
);
193 // complementary node
194 bal_word
= reverseComplement ( word
, overlaplen
);
195 bal_j
= len_seq
- overlaplen
;
196 index
= indexArray
[t
];
198 if ( KmerSmaller ( word
, bal_word
) )
200 hash_ban
= hash_kmer ( word
);
201 hashBanBuffer
[index
] = hash_ban
;
202 kmerBuffer
[index
] = word
;
203 prevcBuffer
[index
] = InvalidCh
;
204 nextcBuffer
[index
++] = src_seq
[0 + overlaplen
];
208 bal_hash_ban
= hash_kmer ( bal_word
);
209 hashBanBuffer
[index
] = bal_hash_ban
;
210 kmerBuffer
[index
] = bal_word
;
211 prevcBuffer
[index
] = bal_seq
[bal_j
- 1];
212 nextcBuffer
[index
++] = InvalidCh
;
215 for ( j
= 1; j
<= len_seq
- overlaplen
; j
++ )
217 word
= nextKmer ( word
, src_seq
[j
- 1 + overlaplen
] );
218 bal_j
= len_seq
- j
- overlaplen
;
219 bal_word
= prevKmer ( bal_word
, bal_seq
[bal_j
] );
221 if ( KmerSmaller ( word
, bal_word
) )
223 hash_ban
= hash_kmer ( word
);
224 hashBanBuffer
[index
] = hash_ban
;
225 kmerBuffer
[index
] = word
;
226 prevcBuffer
[index
] = src_seq
[j
- 1];
228 if ( j
< len_seq
- overlaplen
)
230 nextcBuffer
[index
++] = src_seq
[j
+ overlaplen
];
234 nextcBuffer
[index
++] = InvalidCh
;
237 //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
241 // complementary node
242 bal_hash_ban
= hash_kmer ( bal_word
);
243 hashBanBuffer
[index
] = bal_hash_ban
;
244 kmerBuffer
[index
] = bal_word
;
248 prevcBuffer
[index
] = bal_seq
[bal_j
- 1];
252 prevcBuffer
[index
] = InvalidCh
;
255 nextcBuffer
[index
++] = bal_seq
[bal_j
+ overlaplen
];
256 //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
261 static void sendWorkSignal ( unsigned char SIG
, unsigned char * thrdSignals
)
265 for ( t
= 0; t
< thrd_num
; t
++ )
267 thrdSignals
[t
+ 1] = SIG
;
274 for ( t
= 0; t
< thrd_num
; t
++ )
275 if ( thrdSignals
[t
+ 1] )
287 /*************************************************
291 1. Imports the reads from the lib file one by one.
292 2. Chops the reads into kmers and store them in KmerSets.
293 3. Removes the kmers with low coverage.
294 4. Marks the linear kmers.
295 5. Counts the kmer frequences.
297 1. libfile : the reads config file
298 2. outfile : the output file prefix
303 *************************************************/
304 boolean
prlRead2HashTable ( char * libfile
, char * outfile
)
308 unsigned char asm_ctg
= 1;
310 char * next_name
, name
[256];
312 time_t start_t
, stop_t
;
315 pthread_t threads
[thrd_num
];
316 unsigned char thrdSignal
[thrd_num
+ 1];
317 PARAMETER paras
[thrd_num
];
318 boolean flag
, pairs
= 0;
319 WORDFILTER
= createFilter ( overlaplen
);
322 scan_libInfo ( libfile
);
323 alloc_pe_mem ( num_libs
);
330 if ( gLineLen
< maxReadLen
)
332 gStr
= ( char * ) ckalloc ( ( maxReadLen
+ 1 ) * sizeof ( char ) );
336 maxReadLen4all
= maxReadLen
;
337 fprintf ( stderr
, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile
, num_libs
, maxReadLen
, maxNameLen
);
338 next_name
= ( char * ) ckalloc ( ( maxNameLen
+ 1 ) * sizeof ( char ) );
339 kmerBuffer
= ( Kmer
* ) ckalloc ( buffer_size
* sizeof ( Kmer
) );
340 hashBanBuffer
= ( ubyte8
* ) ckalloc ( buffer_size
* sizeof ( ubyte8
) );
341 prevcBuffer
= ( char * ) ckalloc ( buffer_size
* sizeof ( char ) );
342 nextcBuffer
= ( char * ) ckalloc ( buffer_size
* sizeof ( char ) );
343 maxReadNum
= buffer_size
/ ( maxReadLen
- overlaplen
+ 1 );
344 //printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
345 int maxAIOSize
= 32768;
346 aioBuffer1
= ( char * ) ckalloc ( ( maxAIOSize
) * sizeof ( char ) );
347 aioBuffer2
= ( char * ) ckalloc ( ( maxAIOSize
) * sizeof ( char ) );
348 readBuffer1
= ( char * ) ckalloc ( ( maxAIOSize
+ ( maxReadLen
* 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
349 readBuffer2
= ( char * ) ckalloc ( ( maxAIOSize
+ ( maxReadLen
* 4 + 1024 ) ) * sizeof ( char ) ); //1024
350 cach1
= ( char * ) ckalloc ( ( maxReadLen
* 4 + 1024 ) * sizeof ( char ) ); //1024
351 cach2
= ( char * ) ckalloc ( ( maxReadLen
* 4 + 1024 ) * sizeof ( char ) ); //1024
352 memset ( cach1
, '\0', ( maxReadLen
* 4 + 1024 ) ); //1024
353 memset ( cach2
, '\0', ( maxReadLen
* 4 + 1024 ) ); //1024
354 seqBuffer
= ( char ** ) ckalloc ( maxReadNum
* sizeof ( char * ) );
355 lenBuffer
= ( int * ) ckalloc ( maxReadNum
* sizeof ( int ) );
356 indexArray
= ( int * ) ckalloc ( maxReadNum
* sizeof ( int ) );
358 for ( i
= 0; i
< maxReadNum
; i
++ )
360 seqBuffer
[i
] = ( char * ) ckalloc ( maxReadLen
* sizeof ( char ) );
363 rcSeq
= ( char ** ) ckalloc ( ( thrd_num
+ 1 ) * sizeof ( char * ) );
367 kmerCounter
= ( long long * ) ckalloc ( ( thrd_num
+ 1 ) * sizeof ( long long ) );
368 KmerSets
= ( KmerSet
** ) ckalloc ( thrd_num
* sizeof ( KmerSet
* ) );
369 ubyte8 init_size
= 1024;
372 if ( initKmerSetSize
)
375 init_size
= ( ubyte8
) ( ( double ) initKmerSetSize
* 1024.0f
* 1024.0f
* 1024.0f
/ ( double ) thrd_num
/ 40 );
377 init_size
= ( ubyte8
) ( ( double ) initKmerSetSize
* 1024.0f
* 1024.0f
* 1024.0f
/ ( double ) thrd_num
/ 24 ); //is it true?
384 while ( k
* 0xFFFFFFLLU
< init_size
);
387 for ( i
= 0; i
< thrd_num
; i
++ )
389 //KmerSets[i] = init_kmerset(1024,0.77f);
390 KmerSets
[i
] = init_kmerset ( ( ( initKmerSetSize
) ? ( k
* 0xFFFFFFLLU
) : ( init_size
) ), 0.77f
);
391 thrdSignal
[i
+ 1] = 0;
392 paras
[i
].threadID
= i
;
393 paras
[i
].mainSignal
= &thrdSignal
[0];
394 paras
[i
].selfSignal
= &thrdSignal
[i
+ 1];
395 kmerCounter
[i
+ 1] = 0;
396 rcSeq
[i
+ 1] = ( char * ) ckalloc ( maxReadLen
* sizeof ( char ) );
399 creatThrds ( threads
, paras
);
402 thrdSignal
[0] = kmerCounter
[0] = 0;
404 kmer_c
= n_solexa
= read_c
= i
= libNo
= readNumBack
= gradsCounter
= 0;
406 while ( openNextFile ( &libNo
, pairs
, asm_ctg
) )
409 if ( lib_array
[libNo
].curr_type
== 4 )
411 int type
= 0; //deside the PE reads is good or bad
413 while ( ( flag
= read1seqInLibBam ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), &libNo
, pairs
, 1, &type
) ) != 0 )
415 if ( type
== -1 ) //if the reads is bad, go back.
419 if ( lenBuffer
[read_c
- 1] >= overlaplen
+ 1 )
421 kmer_c
-= lenBuffer
[read_c
- 1] - overlaplen
+ 1;
429 if ( ( ++i
) % 100000000 == 0 )
430 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
432 if ( lenBuffer
[read_c
] < 0 )
433 { fprintf ( stderr
, "Read len %d.\n", lenBuffer
[read_c
] ); }
435 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
439 if(lenBuffer[read_c]>70)
440 lenBuffer[read_c] = 50;
441 else if(lenBuffer[read_c]>40)
442 lenBuffer[read_c] = 40;
444 indexArray
[read_c
] = kmer_c
;
445 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
448 if ( read_c
== maxReadNum
)
450 kmerCounter
[0] += kmer_c
;
451 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
452 sendWorkSignal ( 1, thrdSignal
); //singleKmer
457 //read PE fasta or fastq
458 else if ( lib_array
[libNo
].curr_type
== 1 || lib_array
[libNo
].curr_type
== 2 )
460 initAIO ( &aio1
, aioBuffer1
, fileno ( lib_array
[libNo
].fp1
), maxAIOSize
);
461 initAIO ( &aio2
, aioBuffer2
, fileno ( lib_array
[libNo
].fp2
), maxAIOSize
);
462 int offset1
, offset2
, flag1
, flag2
, rt1
, rt2
;
463 offset1
= offset2
= 0;
464 rt1
= aio_read ( &aio1
);
465 rt2
= aio_read ( &aio2
);
466 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
467 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
469 if ( flag1
&& flag2
)
471 int start1
, start2
, turn
;
475 while ( start1
< offset1
|| start2
< offset2
)
480 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer1
, &start1
, offset1
, libNo
);
482 if ( ( ++i
) % 100000000 == 0 )
483 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
485 if ( lenBuffer
[read_c
] < 0 )
486 { fprintf ( stderr
, "Read len %d.\n", lenBuffer
[read_c
] ); }
488 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
490 if ( start1
>= offset1
)
494 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
500 indexArray
[read_c
] = kmer_c
;
501 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
504 if ( start1
>= offset1
)
508 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
511 if ( read_c
== maxReadNum
)
513 kmerCounter
[0] += kmer_c
;
514 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
515 sendWorkSignal ( 1, thrdSignal
); //singleKmer
525 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer2
, &start2
, offset2
, libNo
);
527 if ( ( ++i
) % 100000000 == 0 )
528 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
530 if ( lenBuffer
[read_c
] < 0 )
531 { fprintf ( stderr
, "Read len %d.\n", lenBuffer
[read_c
] ); }
533 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
535 if ( ( flag2
== 2 ) && ( start2
>= offset2
) )
538 if ( start2
>= offset2
)
542 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
548 indexArray
[read_c
] = kmer_c
;
549 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
552 if ( ( flag2
== 2 ) && ( start2
>= offset2
) )
555 if ( start2
>= offset2
)
559 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
562 if ( read_c
== maxReadNum
)
564 kmerCounter
[0] += kmer_c
;
565 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
566 sendWorkSignal ( 1, thrdSignal
); //singleKmer
576 fprintf(stderr
, "Error: aio_read error.\n");
579 //read single fasta, single fastq and PE fasta in one file
582 initAIO ( &aio1
, aioBuffer1
, fileno ( lib_array
[libNo
].fp1
), maxAIOSize
);
583 int offset
, flag1
, rt
;
585 rt
= aio_read ( &aio1
);
587 while ( ( flag1
= AIORead ( &aio1
, &offset
, readBuffer1
, cach1
, &rt
, lib_array
[libNo
].curr_type
) ) )
591 while ( start
< offset
)
593 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer1
, &start
, offset
, libNo
);
595 if ( ( ++i
) % 100000000 == 0 )
596 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
598 if ( lenBuffer
[read_c
] < 0 )
599 { fprintf ( stderr
, "Read len %d.\n", lenBuffer
[read_c
] ); }
601 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
604 indexArray
[read_c
] = kmer_c
;
605 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
609 if ( read_c
> maxReadNum
- 1024 )
611 kmerCounter
[0] += kmer_c
;
612 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
613 sendWorkSignal ( 1, thrdSignal
); //singleKmer
625 kmerCounter
[0] += kmer_c
;
626 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
627 sendWorkSignal ( 1, thrdSignal
); //singleKmer
631 fprintf ( stderr
, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t
- start_t
), i
);
633 //record insert size info
637 { fprintf ( stderr
, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter
, pes
[gradsCounter
- 1].PE_bound
); }
640 fprintf ( stderr
, "No paired reads found.\n" );
643 sprintf ( name
, "%s.peGrads", outfile
);
644 fo
= ckopen ( name
, "w" );
645 fprintf ( fo
, "grads&num: %d\t%lld\n", gradsCounter
, n_solexa
);
647 for ( i
= 0; i
< gradsCounter
; i
++ )
649 fprintf ( fo
, "%d\t%lld\t%d\n", pes
[i
].insertS
, pes
[i
].PE_bound
, pes
[i
].rank
);
660 unsigned long long alloCounter
= 0;
661 unsigned long long allKmerCounter
= 0;
663 for ( i
= 0; i
< thrd_num
; i
++ )
665 alloCounter
+= count_kmerset ( ( KmerSets
[i
] ) );
666 allKmerCounter
+= kmerCounter
[i
+ 1];
667 free ( ( void * ) rcSeq
[i
+ 1] );
670 fprintf ( stderr
, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter
, kmerCounter
[0], allKmerCounter
);
673 free ( ( void * ) rcSeq
);
674 free ( ( void * ) kmerCounter
);
676 for ( i
= 0; i
< maxReadNum
; i
++ )
678 free ( ( void * ) seqBuffer
[i
] );
681 free ( ( void * ) seqBuffer
);
682 free ( ( void * ) lenBuffer
);
683 free ( ( void * ) indexArray
);
684 free ( ( void * ) kmerBuffer
);
685 free ( ( void * ) hashBanBuffer
);
686 free ( ( void * ) nextcBuffer
);
687 free ( ( void * ) prevcBuffer
);
688 free ( ( void * ) next_name
);
689 free ( ( void * ) aioBuffer1
);
690 free ( ( void * ) aioBuffer2
);
691 free ( ( void * ) readBuffer1
);
692 free ( ( void * ) readBuffer2
);
693 free ( ( void * ) cach1
);
694 free ( ( void * ) cach2
);
695 fprintf ( stderr
, "done hashing nodes\n" );
700 deLowCov ( thrdSignal
);
702 fprintf ( stderr
, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t
- start_t
) );
706 Mark1in1outNode ( thrdSignal
);
707 freqStat ( outfile
);
709 fprintf ( stderr
, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t
- start_t
) );
710 sendWorkSignal ( 3, thrdSignal
); //exit
711 thread_wait ( threads
);
715 void initAIO ( struct aiocb
* aio
, char * buf
, int fd
, int size
)
717 bzero ( aio
, sizeof ( struct aiocb
) );
718 aio
->aio_buf
= ( void * ) buf
;
719 aio
->aio_fildes
= fd
;
720 aio
->aio_nbytes
= size
;
724 int AIORead ( struct aiocb
* mycb
, int * offset
, char * buf
, char * cach
, int * rt
, int curr_type
)
728 size_t mode
, get
, max_list
;
730 // rt = aio_read(mycb);
733 struct aiocb
* aiocb_list
[1];
734 aiocb_list
[0] = mycb
;
739 mode
= aio_suspend ( ( const struct aiocb
* const * ) aiocb_list
, max_list
, NULL
);
743 if ( errno
!= EAGAIN
&& errno
!= EINTR
)
745 fprintf ( stderr
, "Error:%s.\n", errno
);
753 //while(aio_error(mycb) == EINPROGRESS);
754 get
= aio_return ( mycb
);
759 char * temp
= ( char * ) ( ( *mycb
).aio_buf
);
761 if ( ( get
% 32768 ) != 0 )
763 strcpy ( buf
, cach
);
764 memcpy ( &buf
[j
], temp
, get
);
765 memset ( cach
, '\0', j
);
771 if ( ( curr_type
== 2 ) || ( curr_type
== 6 ) )
775 for ( i
= get
- 1; ( temp
[i
] != '@' ) || ( temp
[i
- 1] != '\n' ); i
-- )
777 if ( temp
[i
] == '\n' ) {num
++;}
782 for ( i2
= i
- 2; temp
[i2
] != '\n'; i2
-- ) { ; }
784 if ( temp
[i2
+ 1] == '+' )
786 for ( i2
= i2
- 1; temp
[i2
] != '\n'; i2
-- ) { ; }
788 if ( temp
[i2
+ 1] != '+' ) {for ( i
= i2
- 1; ( temp
[i
] != '@' ) || ( temp
[i
- 1] != '\n' ); i
-- ) { ; }}
792 else if ( ( curr_type
== 1 ) || ( curr_type
== 3 ) || ( curr_type
== 5 ) )
793 for ( i
= get
- 1; temp
[i
] != '>'; i
-- ) { ; }
795 //for (i = get - 1; temp[i] != '>' && temp[i] != '@'; i--) ;
796 strcpy ( buf
, cach
);
797 memcpy ( &buf
[j
], temp
, i
);
800 memset ( cach
, '\0', j
);
801 memcpy ( cach
, &temp
[i
], get
- i
);
802 ( *mycb
).aio_offset
+= get
;
803 *rt
= aio_read ( mycb
);
808 fprintf(stderr
, "Error: aio_return error.\n");
812 char *temp = (char *)((*mycb).aio_buf);
814 strcpy(&buf[j],temp);
823 fprintf(stderr
, "Error: (*rt != 0) in AIORead.\n");
829 boolean
openNextFile ( int * libNo
, boolean pairs
, unsigned char asm_ctg
)
834 if ( lib_array
[i
].fp1
)
835 { closeFp1InLab ( i
); }
837 if ( lib_array
[i
].fp2
)
838 { closeFp2InLab ( i
); }
840 *libNo
= nextValidIndex ( i
, pairs
, asm_ctg
);
843 if ( lib_array
[i
].rd_len_cutoff
> 0 )
844 { maxReadLen
= lib_array
[i
].rd_len_cutoff
< maxReadLen4all
? lib_array
[i
].rd_len_cutoff
: maxReadLen4all
; }
846 { maxReadLen
= maxReadLen4all
; }
848 //record insert size info
849 //printf("from lib %d to %d, read %lld to %ld\n",prevLib,i,readNumBack,n_solexa);
850 if ( pairs
&& i
!= prevLib
)
852 if ( readNumBack
< n_solexa
)
854 pes
[gradsCounter
].PE_bound
= n_solexa
;
855 pes
[gradsCounter
].rank
= lib_array
[prevLib
].rank
;
856 pes
[gradsCounter
].pair_num_cut
= lib_array
[prevLib
].pair_num_cut
;
857 pes
[gradsCounter
++].insertS
= lib_array
[prevLib
].avg_ins
;
858 readNumBack
= n_solexa
;
869 static void thread_delow ( KmerSet
* set
, unsigned char thrdID
)
871 int i
, in_num
, out_num
, cvgSingle
;
876 while ( set
->iter_ptr
< set
->size
)
878 if ( !is_kmer_entity_null ( set
->flags
, set
->iter_ptr
) )
880 in_num
= out_num
= l_cvg
= r_cvg
= 0;
881 rs
= set
->array
+ set
->iter_ptr
;
883 for ( i
= 0; i
< 4; i
++ )
885 cvgSingle
= get_kmer_left_cov ( *rs
, i
);
887 if ( cvgSingle
> 0 && cvgSingle
<= deLowKmer
)
889 set_kmer_left_cov ( *rs
, i
, 0 );
892 cvgSingle
= get_kmer_right_cov ( *rs
, i
);
894 if ( cvgSingle
> 0 && cvgSingle
<= deLowKmer
)
896 set_kmer_right_cov ( *rs
, i
, 0 );
900 if ( rs
->l_links
== 0 && rs
->r_links
== 0 )
910 //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
913 static void deLowCov ( unsigned char * thrdSignal
)
916 long long counter
= 0;
917 tips
= ( long long * ) ckalloc ( thrd_num
* sizeof ( long long ) );
919 for ( i
= 0; i
< thrd_num
; i
++ )
924 sendWorkSignal ( 5, thrdSignal
); //mark linear nodes
926 for ( i
= 0; i
< thrd_num
; i
++ )
931 free ( ( void * ) tips
);
932 fprintf ( stderr
, "%lld kmer(s) removed.\n", counter
);
935 static void thread_mark ( KmerSet
* set
, unsigned char thrdID
)
937 int i
, in_num
, out_num
, cvgSingle
;
940 long long counter
= 0;
943 while ( set
->iter_ptr
< set
->size
)
945 if ( !is_kmer_entity_null ( set
->flags
, set
->iter_ptr
) )
947 in_num
= out_num
= l_cvg
= r_cvg
= 0;
948 rs
= set
->array
+ set
->iter_ptr
;
950 for ( i
= 0; i
< 4; i
++ )
952 cvgSingle
= get_kmer_left_cov ( *rs
, i
);
960 cvgSingle
= get_kmer_right_cov ( *rs
, i
);
971 kmerFreq
[thrdID
][1]++;
976 kmerFreq
[thrdID
][ ( l_cvg
> r_cvg
? l_cvg
: r_cvg
)]++;
979 if ( in_num
== 1 && out_num
== 1 )
989 //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
992 static void Mark1in1outNode ( unsigned char * thrdSignal
)
995 long long counter
= 0;
996 tips
= ( long long * ) ckalloc ( thrd_num
* sizeof ( long long ) );
997 kmerFreq
= ( long long ** ) ckalloc ( thrd_num
* sizeof ( long long * ) );
999 for ( i
= 0; i
< thrd_num
; i
++ )
1001 kmerFreq
[i
] = ( long long * ) ckalloc ( 257 * sizeof ( long long ) );
1002 memset ( kmerFreq
[i
], 0, 257 * sizeof ( long long ) );
1006 sendWorkSignal ( 4, thrdSignal
); //mark linear nodes
1008 for ( i
= 0; i
< thrd_num
; i
++ )
1013 free ( ( void * ) tips
);
1014 fprintf ( stderr
, "%lld linear node(s) marked.\n", counter
);
1017 static void freqStat ( char * outfile
)
1023 sprintf ( name
, "%s.kmerFreq", outfile
);
1024 fo
= ckopen ( name
, "w" );
1026 for ( i
= 1; i
< 256; i
++ )
1030 for ( j
= 0; j
< thrd_num
; j
++ )
1032 sum
+= kmerFreq
[j
][i
];
1035 fprintf ( fo
, "%lld\n", sum
);
1038 for ( i
= 0; i
< thrd_num
; i
++ )
1040 free ( ( void * ) kmerFreq
[i
] );
1043 free ( ( void * ) kmerFreq
);