4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 #define preARCBLOCKSIZE 100000
31 static const Kmer kmerZero
= { 0, 0, 0, 0 };
33 static const Kmer kmerZero
= { 0, 0 };
36 static unsigned int * arcCounters
;
37 static int buffer_size
= 100000000;
38 static long long markCounter
= 0;
39 static unsigned int * fwriteBuf
;
40 static unsigned char * markerOnEdge
; //edge occured times for each edge
42 //buffer related varibles for chop kmer
45 static char ** seqBuffer
;
46 static int * lenBuffer
;
48 //edge and (K+1)mer related variables
49 static preARC
** preArc_array
;
50 static Kmer
* mixBuffer
; //kmer buffer; after searching, mixBuffer[j].low = node->l_links; 'node->l_links'
51 static boolean
* flagArray
; //indicate each item in mixBuffer whether it's a (K+1)mer
53 // kmer related variables
56 static Kmer
* kmerBuffer
;
57 static ubyte8
* hashBanBuffer
;
58 static kmer_t
** nodeBuffer
; //kmer_t buffer related to 'kmerBuffer'
59 static boolean
* smallerBuffer
;
60 static int * indexArray
;
62 static int * deletion
; //read deletion number for each thread
64 static struct aiocb aio1
;
65 static struct aiocb aio2
;
66 static char * aioBuffer1
;
67 static char * aioBuffer2
;
68 static char * readBuffer1
;
69 static char * readBuffer2
;
71 static void parse1read ( int t
, int threadID
);
72 static void search1kmerPlus ( int j
, unsigned char thrdID
);
73 static void threadRoutine ( void * thrdID
);
74 static void searchKmer ( int t
, KmerSet
* kset
);
75 static void chopKmer4read ( int t
, int threadID
);
76 static void thread_wait ( pthread_t
* threads
);
77 static void thread_add1preArc ( unsigned int from_ed
, unsigned int to_ed
, unsigned int thrdID
);
79 static void creatThrds ( pthread_t
* threads
, PARAMETER
* paras
)
84 for ( i
= 0; i
< thrd_num
; i
++ )
86 //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
87 if ( ( temp
= pthread_create ( &threads
[i
], NULL
, ( void * ) threadRoutine
, & ( paras
[i
] ) ) ) != 0 )
89 fprintf ( stderr
, "Create threads failed.\n" );
94 fprintf ( stderr
, "%d thread(s) initialized.\n", thrd_num
);
97 static void threadRoutine ( void * para
)
100 int i
, t
, j
, start
, finish
;
102 prm
= ( PARAMETER
* ) para
;
105 //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
108 if ( * ( prm
->selfSignal
) == 1 )
110 for ( i
= 0; i
< kmer_c
; i
++ )
112 //if((hashBanBuffer[i]&taskMask)!=prm.threadID)
113 if ( ( hashBanBuffer
[i
] % thrd_num
) != id
)
118 searchKmer ( i
, KmerSets
[id
] );
121 * ( prm
->selfSignal
) = 0;
123 else if ( * ( prm
->selfSignal
) == 2 )
125 for ( i
= 0; i
< read_c
; i
++ )
127 if ( i
% thrd_num
!= id
)
132 chopKmer4read ( i
, id
+ 1 );
135 * ( prm
->selfSignal
) = 0;
137 else if ( * ( prm
->selfSignal
) == 3 )
140 for ( t
= 0; t
< read_c
; t
++ )
142 if ( t
% thrd_num
!= id
)
147 parse1read ( t
, id
+ 1 );
150 * ( prm
->selfSignal
) = 0;
152 else if ( * ( prm
->selfSignal
) == 4 )
154 //printf("thread %d, reads %d splay kmerplus\n",id,read_c);
155 for ( t
= 0; t
< read_c
; t
++ )
157 start
= indexArray
[t
];
158 finish
= indexArray
[t
+ 1];
161 for ( j
= start
; j
< finish
; j
++ )
163 if ( flagArray
[j
] == 0 )
165 if ( mixBuffer
[j
].low2
== 0 )
170 else if ( hashBanBuffer
[j
] % thrd_num
== id
)
172 //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
173 search1kmerPlus ( j
, id
);
177 if(flagArray[j]==0&&mixBuffer[j]==0)
179 if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
181 search1kmerPlus(j,id);
187 for ( j
= start
; j
< finish
; j
++ )
189 if ( flagArray
[j
] == 0 )
191 if ( mixBuffer
[j
].low
== 0 )
196 else if ( hashBanBuffer
[j
] % thrd_num
== id
)
198 //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
199 search1kmerPlus ( j
, id
);
203 if(flagArray[j]==0&&mixBuffer[j]==0)
205 if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
207 search1kmerPlus(j,id);
214 * ( prm
->selfSignal
) = 0;
216 else if ( * ( prm
->selfSignal
) == 6 )
218 for ( t
= 0; t
< read_c
; t
++ )
220 start
= indexArray
[t
];
221 finish
= indexArray
[t
+ 1];
224 for ( j
= start
; j
< finish
- 1; j
++ )
226 if ( mixBuffer
[j
].low2
== 0 || mixBuffer
[j
+ 1].low2
== 0 )
231 if ( mixBuffer
[j
].low2
% thrd_num
!= id
)
236 thread_add1preArc ( mixBuffer
[j
].low2
, mixBuffer
[j
+ 1].low2
, id
);
241 for ( j
= start
; j
< finish
- 1; j
++ )
243 if ( mixBuffer
[j
].low
== 0 || mixBuffer
[j
+ 1].low
== 0 )
248 if ( mixBuffer
[j
].low
% thrd_num
!= id
)
253 thread_add1preArc ( mixBuffer
[j
].low
, mixBuffer
[j
+ 1].low
, id
);
259 * ( prm
->selfSignal
) = 0;
261 else if ( * ( prm
->selfSignal
) == 5 )
263 * ( prm
->selfSignal
) = 0;
271 static void chopKmer4read ( int t
, int threadID
)
273 char * src_seq
= seqBuffer
[t
];
274 char * bal_seq
= rcSeq
[threadID
];
275 int len_seq
= lenBuffer
[t
];
277 ubyte8 hash_ban
, bal_hash_ban
;
283 for ( index
= 0; index
< overlaplen
; index
++ )
285 word
= KmerLeftBitMoveBy2 ( word
);
286 word
.low2
|= src_seq
[index
];
292 for ( index
= 0; index
< overlaplen
; index
++ )
294 word
= KmerLeftBitMoveBy2 ( word
);
295 word
.low
|= src_seq
[index
];
299 reverseComplementSeq ( src_seq
, len_seq
, bal_seq
);
300 // complementary node
301 bal_word
= reverseComplement ( word
, overlaplen
);
302 bal_j
= len_seq
- 0 - overlaplen
; // 0;
303 index
= indexArray
[t
];
305 if ( KmerSmaller ( word
, bal_word
) )
307 hash_ban
= hash_kmer ( word
);
308 kmerBuffer
[index
] = word
;
309 smallerBuffer
[index
] = 1;
310 hashBanBuffer
[index
++] = hash_ban
;
314 bal_hash_ban
= hash_kmer ( bal_word
);
315 kmerBuffer
[index
] = bal_word
;
316 smallerBuffer
[index
] = 0;
317 hashBanBuffer
[index
++] = bal_hash_ban
;
320 //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban);
321 for ( j
= 1; j
<= len_seq
- overlaplen
; j
++ )
323 word
= nextKmer ( word
, src_seq
[j
- 1 + overlaplen
] );
324 bal_j
= len_seq
- j
- overlaplen
; // j;
325 bal_word
= prevKmer ( bal_word
, bal_seq
[bal_j
] );
327 if ( KmerSmaller ( word
, bal_word
) )
329 hash_ban
= hash_kmer ( word
);
330 kmerBuffer
[index
] = word
;
331 smallerBuffer
[index
] = 1;
332 hashBanBuffer
[index
++] = hash_ban
;
333 //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
337 // complementary node
338 bal_hash_ban
= hash_kmer ( bal_word
);
339 kmerBuffer
[index
] = bal_word
;
340 smallerBuffer
[index
] = 0;
341 hashBanBuffer
[index
++] = bal_hash_ban
;
342 //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
347 //splay for one kmer in buffer and save the node to nodeBuffer
348 static void searchKmer ( int t
, KmerSet
* kset
)
351 boolean found
= search_kmerset ( kset
, kmerBuffer
[t
], &node
);
355 fprintf ( stderr
, "SearchKmer: kmer " );
356 PrintKmer ( stderr
, kmerBuffer
[t
] );
357 fprintf ( stderr
, " is not found.\n" );
360 fprintf (stderr,"searchKmer: kmer %llx %llx %llx %llx is not found\n", kmerBuffer[t].high1, kmerBuffer[t].low1, kmerBuffer[t].high2, kmerBuffer[t].low2);
362 fprintf (stderr,"searchKmer: kmer %llx %llx is not found\n", kmerBuffer[t].high, kmerBuffer[t].low);
367 nodeBuffer
[t
] = node
;
370 static preARC
* getPreArcBetween ( unsigned int from_ed
, unsigned int to_ed
)
373 parc
= preArc_array
[from_ed
];
377 if ( parc
->to_ed
== to_ed
)
388 static void thread_add1preArc ( unsigned int from_ed
, unsigned int to_ed
, unsigned int thrdID
)
390 preARC
* parc
= getPreArcBetween ( from_ed
, to_ed
);
394 parc
->multiplicity
++;
398 parc
= prlAllocatePreArc ( to_ed
, preArc_mem_managers
[thrdID
] );
399 arcCounters
[thrdID
]++;
400 parc
->next
= preArc_array
[from_ed
];
401 preArc_array
[from_ed
] = parc
;
405 static void memoAlloc4preArc ()
408 preArc_array
= ( preARC
** ) ckalloc ( ( num_ed
+ 1 ) * sizeof ( preARC
* ) );
410 for ( i
= 0; i
<= num_ed
; i
++ )
412 preArc_array
[i
] = NULL
;
416 static void memoFree4preArc ()
418 prlDestroyPreArcMem ();
422 free ( ( void * ) preArc_array
);
426 static void output_arcs ( char * outfile
)
430 FILE * outfp
, *outfp2
= NULL
;
432 sprintf ( name
, "%s.preArc", outfile
);
433 outfp
= ckopen ( name
, "w" );
437 sprintf ( name
, "%s.markOnEdge", outfile
);
438 outfp2
= ckopen ( name
, "w" );
443 for ( i
= 1; i
<= num_ed
; i
++ )
447 markCounter
+= markerOnEdge
[i
];
448 fprintf ( outfp2
, "%d\n", markerOnEdge
[i
] );
451 parc
= preArc_array
[i
];
458 fprintf ( outfp
, "%u", i
);
462 fprintf ( outfp
, " %u %u", parc
->to_ed
, parc
->multiplicity
);
466 fprintf ( outfp
, "\n" );
474 // fprintf (stderr,"%lld marker(s) counted.\n", markCounter);
478 static void recordPathBin ( FILE * outfp
)
480 int t
, j
, start
, finish
;
481 unsigned char counter
;
483 for ( t
= 0; t
< read_c
; t
++ )
485 start
= indexArray
[t
];
486 finish
= indexArray
[t
+ 1];
489 if ( finish
- start
< 3 || mixBuffer
[start
].low2
== 0 || mixBuffer
[start
+ 1].low2
== 0 || mixBuffer
[start
+ 2].low2
== 0 )
496 for ( j
= start
; j
< finish
; j
++ )
498 if ( mixBuffer
[j
].low2
== 0 )
503 fwriteBuf
[counter
++] = ( unsigned int ) mixBuffer
[j
].low2
;
505 if ( markerOnEdge
[mixBuffer
[j
].low2
] < 255 )
507 markerOnEdge
[mixBuffer
[j
].low2
]++;
515 if ( finish
- start
< 3 || mixBuffer
[start
].low
== 0 || mixBuffer
[start
+ 1].low
== 0 || mixBuffer
[start
+ 2].low
== 0 )
522 for ( j
= start
; j
< finish
; j
++ )
524 if ( mixBuffer
[j
].low
== 0 )
529 fwriteBuf
[counter
++] = ( unsigned int ) mixBuffer
[j
].low
;
531 if ( markerOnEdge
[mixBuffer
[j
].low
] < 255 )
533 markerOnEdge
[mixBuffer
[j
].low
]++;
540 fwrite ( &counter
, sizeof ( char ), 1, outfp
);
541 fwrite ( fwriteBuf
, sizeof ( unsigned int ), ( int ) counter
, outfp
);
545 /*************************************************
549 Searchs (k+1) kmer in hashset.
551 1. j: (k+1) mer's index
557 *************************************************/
558 static void search1kmerPlus ( int j
, unsigned char thrdID
)
561 boolean found
= search_kmerset ( KmerSetsPatch
[thrdID
], mixBuffer
[j
], &node
);
566 fprintf(stderr,"kmerPlus %llx %llx (hashban %lld) not found, flag %d!\n",
567 mixBuffer[j].high,mixBuffer[j].low,hashBanBuffer[j],flagArray[j]);
569 mixBuffer
[j
] = kmerZero
;
571 } //else fprintf(stderr,"kmerPlus found\n");
575 if ( smallerBuffer
[j
] )
577 mixBuffer
[j
].low2
= node
->l_links
;
581 mixBuffer
[j
].low2
= node
->l_links
+ node
->twin
- 1;
586 if ( smallerBuffer
[j
] )
588 mixBuffer
[j
].low
= node
->l_links
;
592 mixBuffer
[j
].low
= node
->l_links
+ node
->twin
- 1;
598 static void parse1read ( int t
, int threadID
)
600 unsigned int j
, retain
= 0;
601 unsigned int edge_index
= 0;
604 Kmer wordplus
, bal_wordplus
;
605 unsigned int start
, finish
, pos
;
606 Kmer prevKmer
, currentKmer
;
607 boolean IsPrevKmer
= 0;
608 start
= indexArray
[t
];
609 finish
= indexArray
[t
+ 1];
612 for ( j
= start
; j
< finish
; j
++ )
614 node
= nodeBuffer
[j
];
616 //extract edges or keep kmers
617 if ( ( node
->deleted
) || ( node
->linear
&& !node
->inEdge
) ) // deleted or in a floating loop
632 isSmaller
= smallerBuffer
[j
];
638 edge_index
= node
->l_links
;
642 edge_index
= node
->l_links
+ node
->twin
- 1;
647 if ( retain
== 0 || IsPrevKmer
)
650 mixBuffer
[pos
].low2
= edge_index
;
651 flagArray
[pos
++] = 0;
654 else if ( edge_index
!= mixBuffer
[pos
- 1].low2
)
657 mixBuffer
[pos
].low2
= edge_index
;
658 flagArray
[pos
++] = 0;
663 if ( retain
== 0 || IsPrevKmer
)
666 mixBuffer
[pos
].low
= edge_index
;
667 flagArray
[pos
++] = 0;
670 else if ( edge_index
!= mixBuffer
[pos
- 1].low
)
673 mixBuffer
[pos
].low
= edge_index
;
674 flagArray
[pos
++] = 0;
683 currentKmer
= node
->seq
;
687 currentKmer
= reverseComplement ( node
->seq
, overlaplen
);
693 wordplus
= KmerPlus ( prevKmer
, lastCharInKmer ( currentKmer
) );
694 bal_wordplus
= reverseComplement ( wordplus
, overlaplen
+ 1 );
696 if ( KmerSmaller ( wordplus
, bal_wordplus
) )
698 smallerBuffer
[pos
] = 1;
699 hashBanBuffer
[pos
] = hash_kmer ( wordplus
);
700 mixBuffer
[pos
] = wordplus
;
704 smallerBuffer
[pos
] = 0;
705 hashBanBuffer
[pos
] = hash_kmer ( bal_wordplus
);
706 mixBuffer
[pos
] = bal_wordplus
;
709 // fprintf(stderr,"%lld\n",hashBanBuffer[pos]);
710 flagArray
[pos
++] = 1;
714 prevKmer
= currentKmer
;
719 for(j=start;j<pos;j++)
720 fprintf(stderr,"%d ",flagArray[j]);
721 fprintf(stderr,"\n");
725 deletion
[threadID
]++;
730 flagArray
[start
] = 0;
731 mixBuffer
[start
] = kmerZero
;
735 if ( ( pos
- start
) != retain
)
737 fprintf ( stderr
, "Read %d, %d vs %d.\n", t
, retain
, edge_index
- start
);
743 mixBuffer
[pos
] = kmerZero
;
747 static void sendWorkSignal ( unsigned char SIG
, unsigned char * thrdSignals
)
751 for ( t
= 0; t
< thrd_num
; t
++ )
753 thrdSignals
[t
+ 1] = SIG
;
760 for ( t
= 0; t
< thrd_num
; t
++ )
761 if ( thrdSignals
[t
+ 1] )
773 /*************************************************
777 Maps the reads to edges and builds pre-arcs between edges.
779 1. libfile: the reads config file
780 2. outfile: the output file prefix
785 *************************************************/
786 void prlRead2edge ( char * libfile
, char * outfile
)
790 unsigned char asm_ctg
= 1;
792 char name
[256], *src_name
, *next_name
;
794 int maxReadNum
, libNo
;
795 boolean flag
, pairs
= 0;
796 pthread_t threads
[thrd_num
];
797 unsigned char thrdSignal
[thrd_num
+ 1];
798 PARAMETER paras
[thrd_num
];
802 scan_libInfo ( libfile
);
803 alloc_pe_mem ( num_libs
);
810 maxReadLen4all
= maxReadLen
;
811 fprintf ( stderr
, "In file: %s, max seq len %d, max name len %d.\n", libfile
, maxReadLen
, maxNameLen
);
815 sprintf ( name
, "%s.path", outfile
);
816 outfp
= ckopen ( name
, "wb" );
819 src_name
= ( char * ) ckalloc ( ( maxNameLen
+ 1 ) * sizeof ( char ) );
820 next_name
= ( char * ) ckalloc ( ( maxNameLen
+ 1 ) * sizeof ( char ) );
821 kmerBuffer
= ( Kmer
* ) ckalloc ( buffer_size
* sizeof ( Kmer
) );
822 mixBuffer
= ( Kmer
* ) ckalloc ( buffer_size
* sizeof ( Kmer
) );
823 hashBanBuffer
= ( ubyte8
* ) ckalloc ( buffer_size
* sizeof ( ubyte8
) );
824 nodeBuffer
= ( kmer_t
** ) ckalloc ( buffer_size
* sizeof ( kmer_t
* ) );
825 smallerBuffer
= ( boolean
* ) ckalloc ( buffer_size
* sizeof ( boolean
) );
826 flagArray
= ( boolean
* ) ckalloc ( buffer_size
* sizeof ( boolean
) );
827 maxReadNum
= buffer_size
/ ( maxReadLen
- overlaplen
+ 1 );
828 //printf("buffer for at most %d reads\n",maxReadNum);
829 int maxAIOSize
= 32768;
830 aioBuffer1
= ( char * ) ckalloc ( ( maxAIOSize
) * sizeof ( char ) );
831 aioBuffer2
= ( char * ) ckalloc ( ( maxAIOSize
) * sizeof ( char ) );
832 readBuffer1
= ( char * ) ckalloc ( ( maxAIOSize
+ ( maxReadLen
* 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
833 readBuffer2
= ( char * ) ckalloc ( ( maxAIOSize
+ ( maxReadLen
* 4 + 1024 ) ) * sizeof ( char ) ); //1024
834 cach1
= ( char * ) ckalloc ( ( maxReadLen
* 4 + 1024 ) * sizeof ( char ) ); //1024
835 cach2
= ( char * ) ckalloc ( ( maxReadLen
* 4 + 1024 ) * sizeof ( char ) ); //1024
836 memset ( cach1
, '\0', ( maxReadLen
* 4 + 1024 ) ); //1024
837 memset ( cach2
, '\0', ( maxReadLen
* 4 + 1024 ) ); //1024
838 seqBuffer
= ( char ** ) ckalloc ( maxReadNum
* sizeof ( char * ) );
839 lenBuffer
= ( int * ) ckalloc ( maxReadNum
* sizeof ( int ) );
840 indexArray
= ( int * ) ckalloc ( ( maxReadNum
+ 1 ) * sizeof ( int ) );
842 for ( i
= 0; i
< maxReadNum
; i
++ )
844 seqBuffer
[i
] = ( char * ) ckalloc ( maxReadLen
* sizeof ( char ) );
848 flags
= ( char ** ) ckalloc ( ( thrd_num
+ 1 ) * sizeof ( char * ) );
849 deletion
= ( int * ) ckalloc ( ( thrd_num
+ 1 ) * sizeof ( int ) );
850 rcSeq
= ( char ** ) ckalloc ( ( thrd_num
+ 1 ) * sizeof ( char * ) );
854 markerOnEdge
= ( unsigned char * ) ckalloc ( ( num_ed
+ 1 ) * sizeof ( unsigned char ) );
856 for ( i
= 1; i
<= num_ed
; i
++ )
861 fwriteBuf
= ( unsigned int * ) ckalloc ( ( maxReadLen
- overlaplen
+ 1 ) * sizeof ( unsigned int ) );
868 preArc_mem_managers
= ( MEM_MANAGER
** ) ckalloc ( thrd_num
* sizeof ( MEM_MANAGER
* ) );
869 arcCounters
= ( unsigned int * ) ckalloc ( thrd_num
* sizeof ( unsigned int ) );
871 for ( i
= 0; i
< thrd_num
; i
++ )
874 preArc_mem_managers
[i
] = createMem_manager ( preARCBLOCKSIZE
, sizeof ( preARC
) );
876 flags
[i
+ 1] = ( char * ) ckalloc ( 2 * maxReadLen
* sizeof ( char ) );
877 rcSeq
[i
+ 1] = ( char * ) ckalloc ( maxReadLen
* sizeof ( char ) );
878 thrdSignal
[i
+ 1] = 0;
879 paras
[i
].threadID
= i
;
880 paras
[i
].mainSignal
= &thrdSignal
[0];
881 paras
[i
].selfSignal
= &thrdSignal
[i
+ 1];
884 creatThrds ( threads
, paras
);
890 flags
[0] = ( char * ) ckalloc ( 2 * maxReadLen
* sizeof ( char ) );
891 rcSeq
[0] = ( char * ) ckalloc ( maxReadLen
* sizeof ( char ) );
894 kmer_c
= n_solexa
= read_c
= i
= libNo
= readNumBack
= gradsCounter
= 0;
895 int t0
, t1
, t2
, t3
, t4
, t5
, t6
;
896 t0
= t1
= t2
= t3
= t4
= t5
= t6
= 0;
897 time_t read_start
, read_end
, time_bef
, time_aft
;
898 time ( &read_start
);
900 while ( openNextFile ( &libNo
, pairs
, asm_ctg
) )
902 if ( lib_array
[libNo
].curr_type
== 4 )
904 int type
= 0; //deside the PE reads is good or bad
906 while ( ( flag
= read1seqInLibBam ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), &libNo
, pairs
, 1, &type
) ) != 0 )
908 if ( type
== -1 ) //if the reads is bad, go back.
912 if ( lenBuffer
[read_c
- 1] >= overlaplen
+ 1 )
914 kmer_c
-= lenBuffer
[read_c
- 1] - overlaplen
+ 1;
922 if ( ( ++i
) % 100000000 == 0 )
924 fprintf ( stderr
, "--- %lldth reads.\n", i
);
927 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
932 //if(lenBuffer[read_c]>70)
933 // lenBuffer[read_c] = 70;
934 //else if(lenBuffer[read_c]>40)
935 // lenBuffer[read_c] = 40;
936 indexArray
[read_c
] = kmer_c
;
937 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
940 if ( read_c
== maxReadNum
)
942 indexArray
[read_c
] = kmer_c
;
944 t0
+= read_end
- read_start
;
946 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
948 t1
+= time_aft
- time_bef
;
950 sendWorkSignal ( 1, thrdSignal
); //searchKmer
952 t2
+= time_aft
- time_bef
;
954 sendWorkSignal ( 3, thrdSignal
); //parse1read
956 t3
+= time_aft
- time_bef
;
958 sendWorkSignal ( 4, thrdSignal
); //search1kmerPlus
960 t4
+= time_aft
- time_bef
;
962 sendWorkSignal ( 6, thrdSignal
); //thread_add1preArc
964 t5
+= time_aft
- time_bef
;
970 recordPathBin ( outfp
);
974 t6
+= time_aft
- time_bef
;
975 //output_path(read_c,edge_no,flags,outfp);
978 time ( &read_start
);
982 else if ( lib_array
[libNo
].curr_type
== 1 || lib_array
[libNo
].curr_type
== 2 )
984 initAIO ( &aio1
, aioBuffer1
, fileno ( lib_array
[libNo
].fp1
), maxAIOSize
);
985 initAIO ( &aio2
, aioBuffer2
, fileno ( lib_array
[libNo
].fp2
), maxAIOSize
);
986 int offset1
, offset2
, flag1
, flag2
, rt1
, rt2
;
987 offset1
= offset2
= 0;
988 rt1
= aio_read ( &aio1
);
989 rt2
= aio_read ( &aio2
);
990 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
991 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
993 if ( flag1
&& flag2
)
995 int start1
, start2
, turn
;
999 while ( start1
< offset1
|| start2
< offset2
)
1004 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer1
, &start1
, offset1
, libNo
);
1006 if ( ( ++i
) % 100000000 == 0 )
1007 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
1009 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
1011 if ( start1
>= offset1
)
1015 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
1021 indexArray
[read_c
] = kmer_c
;
1022 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
1025 if ( start1
>= offset1
)
1029 flag1
= AIORead ( &aio1
, &offset1
, readBuffer1
, cach1
, &rt1
, lib_array
[libNo
].curr_type
);
1032 if ( read_c
== maxReadNum
)
1034 indexArray
[read_c
] = kmer_c
;
1036 t0
+= read_end
- read_start
;
1038 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
1040 t1
+= time_aft
- time_bef
;
1042 sendWorkSignal ( 1, thrdSignal
); //searchKmer
1044 t2
+= time_aft
- time_bef
;
1046 sendWorkSignal ( 3, thrdSignal
); //parse1read
1048 t3
+= time_aft
- time_bef
;
1050 sendWorkSignal ( 4, thrdSignal
); //search1kmerPlus
1052 t4
+= time_aft
- time_bef
;
1054 sendWorkSignal ( 6, thrdSignal
); //thread_add1preArc
1056 t5
+= time_aft
- time_bef
;
1061 { recordPathBin ( outfp
); }
1064 t6
+= time_aft
- time_bef
;
1065 //output_path(read_c,edge_no,flags,outfp);
1068 time ( &read_start
);
1077 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer2
, &start2
, offset2
, libNo
);
1079 if ( ( ++i
) % 100000000 == 0 )
1080 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
1082 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
1084 if ( ( flag2
== 2 ) && ( start2
>= offset2
) )
1087 if ( start2
>= offset2
)
1091 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
1097 indexArray
[read_c
] = kmer_c
;
1098 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
1101 if ( ( flag2
== 2 ) && ( start2
>= offset2
) )
1104 if ( start2
>= offset2
)
1108 flag2
= AIORead ( &aio2
, &offset2
, readBuffer2
, cach2
, &rt2
, lib_array
[libNo
].curr_type
);
1111 if ( read_c
== maxReadNum
)
1113 indexArray
[read_c
] = kmer_c
;
1115 t0
+= read_end
- read_start
;
1117 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
1119 t1
+= time_aft
- time_bef
;
1121 sendWorkSignal ( 1, thrdSignal
); //searchKmer
1123 t2
+= time_aft
- time_bef
;
1125 sendWorkSignal ( 3, thrdSignal
); //parse1read
1127 t3
+= time_aft
- time_bef
;
1129 sendWorkSignal ( 4, thrdSignal
); //search1kmerPlus
1131 t4
+= time_aft
- time_bef
;
1133 sendWorkSignal ( 6, thrdSignal
); //thread_add1preArc
1135 t5
+= time_aft
- time_bef
;
1140 { recordPathBin ( outfp
); }
1143 t6
+= time_aft
- time_bef
;
1144 //output_path(read_c,edge_no,flags,outfp);
1147 time ( &read_start
);
1156 fprintf(stderr
, "Error: aio_read error.\n");
1161 initAIO ( &aio1
, aioBuffer1
, fileno ( lib_array
[libNo
].fp1
), maxAIOSize
);
1162 int offset
, flag1
, rt
;
1164 rt
= aio_read ( &aio1
);
1166 while ( ( flag1
= AIORead ( &aio1
, &offset
, readBuffer1
, cach1
, &rt
, lib_array
[libNo
].curr_type
) ) )
1170 while ( start
< offset
)
1172 readseqInLib ( seqBuffer
[read_c
], next_name
, & ( lenBuffer
[read_c
] ), readBuffer1
, &start
, offset
, libNo
);
1174 if ( ( ++i
) % 100000000 == 0 )
1175 { fprintf ( stderr
, "--- %lldth reads.\n", i
); }
1177 if ( lenBuffer
[read_c
] < overlaplen
+ 1 )
1180 indexArray
[read_c
] = kmer_c
;
1181 kmer_c
+= lenBuffer
[read_c
] - overlaplen
+ 1;
1185 if ( read_c
> maxReadNum
- 1024 )
1187 indexArray
[read_c
] = kmer_c
;
1189 t0
+= read_end
- read_start
;
1191 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
1193 t1
+= time_aft
- time_bef
;
1195 sendWorkSignal ( 1, thrdSignal
); //searchKmer
1197 t2
+= time_aft
- time_bef
;
1199 sendWorkSignal ( 3, thrdSignal
); //parse1read
1201 t3
+= time_aft
- time_bef
;
1203 sendWorkSignal ( 4, thrdSignal
); //search1kmerPlus
1205 t4
+= time_aft
- time_bef
;
1207 sendWorkSignal ( 6, thrdSignal
); //thread_add1preArc
1209 t5
+= time_aft
- time_bef
;
1214 { recordPathBin ( outfp
); }
1217 t6
+= time_aft
- time_bef
;
1218 //output_path(read_c,edge_no,flags,outfp);
1221 time ( &read_start
);
1230 fprintf ( stderr
, "%lld read(s) processed.\n", i
);
1231 // fprintf (stderr,"Time ReadingReads: %d,chopKmer4read: %d,searchKmer: %d,parse1read: %d,search1kmerPlus: %d,thread_add1preArc: %d,recordPathBin: %d\n", t0, t1, t2, t3, t4, t5, t6);
1232 fprintf ( stderr
, "Time spent on:\n" );
1233 fprintf ( stderr
, " importing reads: %ds,\n", t0
);
1234 fprintf ( stderr
, " chopping reads to kmers: %ds,\n", t1
);
1235 fprintf ( stderr
, " searching kmers: %ds,\n", t2
);
1236 fprintf ( stderr
, " aligning reads to edges: %ds,\n", t3
);
1237 fprintf ( stderr
, " searching (K+1)mers: %ds,\n", t4
);
1238 fprintf ( stderr
, " adding pre-arcs: %ds,\n", t5
);
1239 fprintf ( stderr
, " recording read paths: %ds.\n", t6
);
1243 indexArray
[read_c
] = kmer_c
;
1244 sendWorkSignal ( 2, thrdSignal
); //chopKmer4read
1245 sendWorkSignal ( 1, thrdSignal
); //searchKmer
1246 sendWorkSignal ( 3, thrdSignal
); //parse1read
1247 sendWorkSignal ( 4, thrdSignal
); //search1kmerPlus
1248 sendWorkSignal ( 6, thrdSignal
); //thread_add1preArc
1253 recordPathBin ( outfp
);
1257 fprintf ( stderr
, "%lld marker(s) output.\n", markCounter
);
1258 sendWorkSignal ( 5, thrdSignal
); //over
1259 thread_wait ( threads
);
1260 output_arcs ( outfile
);
1263 if ( 1 ) // multi-threads
1267 for ( i
= 0; i
< thrd_num
; i
++ )
1269 arcCounter
+= arcCounters
[i
];
1270 free ( ( void * ) flags
[i
+ 1] );
1271 deletion
[0] += deletion
[i
+ 1];
1272 free ( ( void * ) rcSeq
[i
+ 1] );
1278 free ( ( void * ) flags
[0] );
1279 free ( ( void * ) rcSeq
[0] );
1282 fprintf ( stderr
, "Reads alignment done, %d read(s) deleted, %lld pre-arc(s) added.\n", deletion
[0], arcCounter
);
1286 free ( ( void * ) markerOnEdge
);
1287 free ( ( void * ) fwriteBuf
);
1290 free ( ( void * ) arcCounters
);
1291 free ( ( void * ) rcSeq
);
1293 for ( i
= 0; i
< maxReadNum
; i
++ )
1295 free ( ( void * ) seqBuffer
[i
] );
1298 free ( ( void * ) seqBuffer
);
1299 free ( ( void * ) lenBuffer
);
1300 free ( ( void * ) indexArray
);
1301 free ( ( void * ) flags
);
1302 free ( ( void * ) deletion
);
1303 free ( ( void * ) kmerBuffer
);
1304 free ( ( void * ) mixBuffer
);
1305 free ( ( void * ) smallerBuffer
);
1306 free ( ( void * ) flagArray
);
1307 free ( ( void * ) hashBanBuffer
);
1308 free ( ( void * ) nodeBuffer
);
1309 free ( ( void * ) src_name
);
1310 free ( ( void * ) next_name
);
1311 free ( ( void * ) aioBuffer1
);
1312 free ( ( void * ) aioBuffer2
);
1313 free ( ( void * ) readBuffer1
);
1314 free ( ( void * ) readBuffer2
);
1315 free ( ( void * ) cach1
);
1316 free ( ( void * ) cach2
);
1318 if ( gLineLen
< maxReadLen
)
1320 free ( ( void * ) gStr
);
1333 static void thread_wait ( pthread_t
* threads
)
1337 for ( i
= 0; i
< thrd_num
; i
++ )
1338 if ( threads
[i
] != 0 )
1340 pthread_join ( threads
[i
], NULL
);