4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 static char tabs
[2][1024]; //for splitColumn()
31 /*************************************************
35 Get the max length for long reads (asm_flags=4) in lib.
37 1. num_libs: number of lib
42 *************************************************/
43 int getMaxLongReadLen ( int num_libs
)
49 for ( i
= 0; i
< num_libs
; i
++ )
51 if ( lib_array
[i
].asm_flag
!= 4 )
57 maxLong
= maxLong
< lib_array
[i
].rd_len_cutoff
? lib_array
[i
].rd_len_cutoff
: maxLong
;
66 return maxLong
> 0 ? maxLong
: maxReadLen
;
70 static boolean
splitColumn ( char * line
)
72 int len
= strlen ( line
);
78 if ( line
[i
] >= 32 && line
[i
] <= 126 && line
[i
] != '=' )
82 while ( i
< len
&& line
[i
] >= 32 && line
[i
] <= 126 && line
[i
] != '=' )
84 tabs
[tabs_n
][j
++] = line
[i
];
88 tabs
[tabs_n
][j
] = '\0';
110 static int cmp_lib ( const void * a
, const void * b
)
113 A
= ( LIB_INFO
* ) a
;
114 B
= ( LIB_INFO
* ) b
;
116 if ( A
->avg_ins
> B
->avg_ins
)
120 else if ( A
->avg_ins
== B
->avg_ins
)
130 void scan_libInfo ( char * libfile
)
138 fp
= ckopen ( libfile
, "r" );
141 while ( fgets ( line
, 1024, fp
) )
146 if ( strcmp ( line
, "[LIB]" ) == 0 )
154 flag
= splitColumn ( line
);
161 if ( strcmp ( tabs
[0], "max_rd_len" ) == 0 )
163 maxReadLen
= atoi ( tabs
[1] );
170 fprintf ( stderr
, "Config file error: no [LIB] in file\n" );
174 //count file numbers of each type
175 lib_array
= ( LIB_INFO
* ) ckalloc ( num_libs
* sizeof ( LIB_INFO
) );
176 pe
= ( boolean
* ) ckalloc ( num_libs
* sizeof ( boolean
) );
178 for ( i
= 0; i
< num_libs
; i
++ )
180 lib_array
[i
].asm_flag
= 3;
181 lib_array
[i
].rank
= 0;
182 lib_array
[i
].pair_num_cut
= 0;
183 lib_array
[i
].rd_len_cutoff
= 0;
184 lib_array
[i
].map_len
= 0;
185 lib_array
[i
].num_s_a_file
= 0;
186 lib_array
[i
].num_s_q_file
= 0;
187 lib_array
[i
].num_p_file
= 0;
188 lib_array
[i
].num_a1_file
= 0;
189 lib_array
[i
].num_a2_file
= 0;
190 lib_array
[i
].num_q1_file
= 0;
191 lib_array
[i
].num_q2_file
= 0;
192 lib_array
[i
].num_b_file
= 0; //init
200 while ( fgets ( line
, 1024, fp
) )
205 if ( strcmp ( line
, "[LIB]" ) == 0 )
212 flag
= splitColumn ( line
);
219 if ( strcmp ( tabs
[0], "f1" ) == 0 )
221 lib_array
[i
].num_a1_file
++;
224 else if ( strcmp ( tabs
[0], "q1" ) == 0 )
226 lib_array
[i
].num_q1_file
++;
229 else if ( strcmp ( tabs
[0], "f2" ) == 0 )
231 lib_array
[i
].num_a2_file
++;
234 else if ( strcmp ( tabs
[0], "q2" ) == 0 )
236 lib_array
[i
].num_q2_file
++;
239 else if ( strcmp ( tabs
[0], "f" ) == 0 )
241 lib_array
[i
].num_s_a_file
++;
243 else if ( strcmp ( tabs
[0], "q" ) == 0 )
245 lib_array
[i
].num_s_q_file
++;
247 else if ( strcmp ( tabs
[0], "p" ) == 0 )
249 lib_array
[i
].num_p_file
++;
252 else if ( strcmp ( tabs
[0], "b" ) == 0 ) // the bam file
254 lib_array
[i
].num_b_file
++;
259 //allocate memory for filenames
260 for ( i
= 0; i
< num_libs
; i
++ )
262 if ( lib_array
[i
].num_a2_file
!= lib_array
[i
].num_a1_file
)
264 fprintf ( stderr
, "Config file error: the number of mark \"f1\" is not the same as \"f2\"!\n" );
268 if ( lib_array
[i
].num_q2_file
!= lib_array
[i
].num_q1_file
)
270 fprintf ( stderr
, "Config file error: the number of mark \"q1\" is not the same as \"q2\"!\n" );
274 if ( lib_array
[i
].num_s_a_file
)
276 lib_array
[i
].s_a_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_s_a_file
* sizeof ( char * ) );
278 for ( j
= 0; j
< lib_array
[i
].num_s_a_file
; j
++ )
280 lib_array
[i
].s_a_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
284 if ( lib_array
[i
].num_s_q_file
)
286 lib_array
[i
].s_q_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_s_q_file
* sizeof ( char * ) );
288 for ( j
= 0; j
< lib_array
[i
].num_s_q_file
; j
++ )
290 lib_array
[i
].s_q_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
294 if ( lib_array
[i
].num_p_file
)
296 lib_array
[i
].p_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_p_file
* sizeof ( char * ) );
298 for ( j
= 0; j
< lib_array
[i
].num_p_file
; j
++ )
300 lib_array
[i
].p_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
304 if ( lib_array
[i
].num_a1_file
)
306 lib_array
[i
].a1_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_a1_file
* sizeof ( char * ) );
308 for ( j
= 0; j
< lib_array
[i
].num_a1_file
; j
++ )
310 lib_array
[i
].a1_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
314 if ( lib_array
[i
].num_a2_file
)
316 lib_array
[i
].a2_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_a2_file
* sizeof ( char * ) );
318 for ( j
= 0; j
< lib_array
[i
].num_a2_file
; j
++ )
320 lib_array
[i
].a2_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
324 if ( lib_array
[i
].num_q1_file
)
326 lib_array
[i
].q1_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_q1_file
* sizeof ( char * ) );
328 for ( j
= 0; j
< lib_array
[i
].num_q1_file
; j
++ )
330 lib_array
[i
].q1_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
334 if ( lib_array
[i
].num_q2_file
)
336 lib_array
[i
].q2_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_q2_file
* sizeof ( char * ) );
338 for ( j
= 0; j
< lib_array
[i
].num_q2_file
; j
++ )
340 lib_array
[i
].q2_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
344 if ( lib_array
[i
].num_b_file
) //allot memory for bam file name
346 lib_array
[i
].b_fname
= ( char ** ) ckalloc ( lib_array
[i
].num_b_file
* sizeof ( char * ) );
348 for ( j
= 0; j
< lib_array
[i
].num_b_file
; j
++ )
349 { lib_array
[i
].b_fname
[j
] = ( char * ) ckalloc ( 1024 * sizeof ( char ) ); }
354 for ( i
= 0; i
< num_libs
; i
++ )
356 lib_array
[i
].curr_type
= 1;
357 lib_array
[i
].curr_index
= 0;
358 lib_array
[i
].fp1
= NULL
;
359 lib_array
[i
].fp2
= NULL
;
360 lib_array
[i
].num_s_a_file
= 0;
361 lib_array
[i
].num_s_q_file
= 0;
362 lib_array
[i
].num_p_file
= 0;
363 lib_array
[i
].num_a1_file
= 0;
364 lib_array
[i
].num_a2_file
= 0;
365 lib_array
[i
].num_q1_file
= 0;
366 lib_array
[i
].num_q2_file
= 0;
367 lib_array
[i
].num_b_file
= 0; //init
368 lib_array
[i
].fp3
= NULL
;
375 while ( fgets ( line
, 1024, fp
) )
380 if ( strcmp ( line
, "[LIB]" ) == 0 )
387 flag
= splitColumn ( line
);
394 if ( strcmp ( tabs
[0], "f1" ) == 0 )
396 index
= lib_array
[i
].num_a1_file
++;
397 strcpy ( lib_array
[i
].a1_fname
[index
], tabs
[1] );
399 else if ( strcmp ( tabs
[0], "q1" ) == 0 )
401 index
= lib_array
[i
].num_q1_file
++;
402 strcpy ( lib_array
[i
].q1_fname
[index
], tabs
[1] );
404 else if ( strcmp ( tabs
[0], "f2" ) == 0 )
406 index
= lib_array
[i
].num_a2_file
++;
407 strcpy ( lib_array
[i
].a2_fname
[index
], tabs
[1] );
409 if ( strcmp ( lib_array
[i
].a2_fname
[index
], lib_array
[i
].a1_fname
[index
] ) == 0 )
411 fprintf ( stderr
, "Config file error: f2 file is the same as f1 file\n" );
412 fprintf ( stderr
, "f1=%s\n", lib_array
[i
].a1_fname
[index
] );
413 fprintf ( stderr
, "f2=%s\n", lib_array
[i
].a2_fname
[index
] );
417 else if ( strcmp ( tabs
[0], "q2" ) == 0 )
419 index
= lib_array
[i
].num_q2_file
++;
420 strcpy ( lib_array
[i
].q2_fname
[index
], tabs
[1] );
422 if ( strcmp ( lib_array
[i
].q2_fname
[index
], lib_array
[i
].q1_fname
[index
] ) == 0 )
424 fprintf ( stderr
, "Config file error: q2 file is the same as q1 file\n" );
425 fprintf ( stderr
, "q1=%s\n", lib_array
[i
].q1_fname
[index
] );
426 fprintf ( stderr
, "q2=%s\n", lib_array
[i
].q2_fname
[index
] );
430 else if ( strcmp ( tabs
[0], "f" ) == 0 )
432 index
= lib_array
[i
].num_s_a_file
++;
433 strcpy ( lib_array
[i
].s_a_fname
[index
], tabs
[1] );
435 else if ( strcmp ( tabs
[0], "q" ) == 0 )
437 index
= lib_array
[i
].num_s_q_file
++;
438 strcpy ( lib_array
[i
].s_q_fname
[index
], tabs
[1] );
440 else if ( strcmp ( tabs
[0], "p" ) == 0 )
442 index
= lib_array
[i
].num_p_file
++;
443 strcpy ( lib_array
[i
].p_fname
[index
], tabs
[1] );
445 else if ( strcmp ( tabs
[0], "b" ) == 0 )
448 index
= lib_array
[i
].num_b_file
++;
449 strcpy ( lib_array
[i
].b_fname
[index
], tabs
[1] );
451 else if ( strcmp ( tabs
[0], "min_ins" ) == 0 )
453 lib_array
[i
].min_ins
= atoi ( tabs
[1] );
455 else if ( strcmp ( tabs
[0], "max_ins" ) == 0 )
457 lib_array
[i
].max_ins
= atoi ( tabs
[1] );
459 else if ( strcmp ( tabs
[0], "avg_ins" ) == 0 )
461 lib_array
[i
].avg_ins
= atoi ( tabs
[1] );
463 else if ( strcmp ( tabs
[0], "rd_len_cutoff" ) == 0 )
465 lib_array
[i
].rd_len_cutoff
= atoi ( tabs
[1] );
467 else if ( strcmp ( tabs
[0], "reverse_seq" ) == 0 )
469 lib_array
[i
].reverse
= atoi ( tabs
[1] );
471 else if ( strcmp ( tabs
[0], "asm_flags" ) == 0 )
473 lib_array
[i
].asm_flag
= atoi ( tabs
[1] );
475 else if ( strcmp ( tabs
[0], "rank" ) == 0 )
477 lib_array
[i
].rank
= atoi ( tabs
[1] );
479 else if ( strcmp ( tabs
[0], "pair_num_cutoff" ) == 0 )
481 lib_array
[i
].pair_num_cut
= atoi ( tabs
[1] );
483 else if ( strcmp ( tabs
[0], "rd_len_cutoff" ) == 0 )
485 lib_array
[i
].rd_len_cutoff
= atoi ( tabs
[1] );
487 else if ( strcmp ( tabs
[0], "map_len" ) == 0 )
489 lib_array
[i
].map_len
= atoi ( tabs
[1] );
493 for ( i
= 0; i
< num_libs
; i
++ )
495 if ( pe
[i
] && lib_array
[i
].avg_ins
== 0 )
497 fprintf ( stderr
, "Config file error: PE reads need avg_ins in [LIB] %d\n", i
+ 1 );
503 qsort ( &lib_array
[0], num_libs
, sizeof ( LIB_INFO
), cmp_lib
);
514 fprintf ( stderr
, "LIB(s) information:\n" );
516 for ( i
= 0; i
< num_libs
; i
++ )
518 fprintf ( stderr
, " [LIB] %d, avg_ins %d, reverse %d.\n", i
, lib_array
[i
].avg_ins
, lib_array
[i
].reverse
);
520 if ( lib_array
[i
].num_s_a_file
)
522 //printf("%d single fasta files\n",lib_array[i].num_s_a_file);
523 for ( j
= 0; j
< lib_array
[i
].num_s_a_file
; j
++ )
525 free ( ( void * ) lib_array
[i
].s_a_fname
[j
] );
528 free ( ( void * ) lib_array
[i
].s_a_fname
);
531 if ( lib_array
[i
].num_s_q_file
)
533 //printf("%d single fastq files\n",lib_array[i].num_s_q_file);
534 for ( j
= 0; j
< lib_array
[i
].num_s_q_file
; j
++ )
536 free ( ( void * ) lib_array
[i
].s_q_fname
[j
] );
539 free ( ( void * ) lib_array
[i
].s_q_fname
);
542 if ( lib_array
[i
].num_p_file
)
544 //printf("%d paired fasta files\n",lib_array[i].num_p_file);
545 for ( j
= 0; j
< lib_array
[i
].num_p_file
; j
++ )
547 free ( ( void * ) lib_array
[i
].p_fname
[j
] );
550 free ( ( void * ) lib_array
[i
].p_fname
);
553 if ( lib_array
[i
].num_a1_file
)
555 //printf("%d read1 fasta files\n",lib_array[i].num_a1_file);
556 for ( j
= 0; j
< lib_array
[i
].num_a1_file
; j
++ )
558 free ( ( void * ) lib_array
[i
].a1_fname
[j
] );
561 free ( ( void * ) lib_array
[i
].a1_fname
);
564 if ( lib_array
[i
].num_a2_file
)
566 //printf("%d read2 fasta files\n",lib_array[i].num_a2_file);
567 for ( j
= 0; j
< lib_array
[i
].num_a2_file
; j
++ )
569 free ( ( void * ) lib_array
[i
].a2_fname
[j
] );
572 free ( ( void * ) lib_array
[i
].a2_fname
);
575 if ( lib_array
[i
].num_q1_file
)
577 //printf("%d read1 fastq files\n",lib_array[i].num_q1_file);
578 for ( j
= 0; j
< lib_array
[i
].num_q1_file
; j
++ )
580 free ( ( void * ) lib_array
[i
].q1_fname
[j
] );
583 free ( ( void * ) lib_array
[i
].q1_fname
);
586 if ( lib_array
[i
].num_q2_file
)
588 //printf("%d read2 fastq files\n",lib_array[i].num_q2_file);
589 for ( j
= 0; j
< lib_array
[i
].num_q2_file
; j
++ )
591 free ( ( void * ) lib_array
[i
].q2_fname
[j
] );
594 free ( ( void * ) lib_array
[i
].q2_fname
);
597 if ( lib_array
[i
].num_b_file
)
599 //free the bam file name
600 //printf("%d bam files\n",lib_array[i].num_b_file);
601 for ( j
= 0; j
< lib_array
[i
].num_b_file
; j
++ )
602 { free ( ( void * ) lib_array
[i
].b_fname
[j
] ); }
604 free ( ( void * ) lib_array
[i
].b_fname
);
609 free ( ( void * ) lib_array
);
612 void alloc_pe_mem ( int gradsCounter
)
616 pes
= ( PE_INFO
* ) ckalloc ( gradsCounter
* sizeof ( PE_INFO
) );
624 free ( ( void * ) pes
);