3 readbwt.cpp Build index of short reads.
5 This program builds index of short reads for use of assembler BASE and some other programs.
7 # Copyright (C) 2015, The University of Hong Kong.
9 # This program is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License
11 # as published by the Free Software Foundation; either version 3
12 # of the License, or (at your option) any later version.
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 03110-1301, USA.
25 Change : Developped this product to build BWT of short reads.
26 This is the implementation of CX1 method.
30 Change : Add the implementation of CX1 method without GPU.
31 Add the loading of SOAPdenovo read libarary files, add qual_cutoff to set quality threshold.
32 Add a new output file for pair end reads with different insert sizes.
40 #include <sys/sysinfo.h> // memory size.
47 #include "lv2_cpu_sort.h"
48 #include "lv2_gpu_sort.h"
52 typedef unsigned int word
;
54 #define NUM_CPU_THREADS 24
55 //#define THREADS_PER_BLOCK 256 // GPU
57 // do not modify below
58 #define BITS_PER_WORD 32 //used for quality.
59 #define BITS_PER_CHAR 2
61 #define BITS_PER_QUAL 1 //used for quality.
63 #define CHARS_PER_WORD 16
66 #define PREFIX_LENGTH 8
67 #define SPECIAL_HANDLE_SUFFIX_LENGTH 3 // suffixes with length <= 3 will be handled specially
68 #define SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS 125 // 5^3
69 #define NUM_BUCKETS 390625
70 #define PREFIX_MASK 0xFFFF
72 char dna_map
[256]; // map from ACGT to 0123
73 char dna_char
[5] = {'A','C','G','T','$'};
74 static char tabs
[2][1024];
82 int number_cpu_threads
= 24;
84 #define SENTINEL_INT 4 // '$'
86 //**********************************define structures start
88 struct section_data_t
{
90 struct global_data_t
*parent
;
91 int64 section_start
, section_end
; // start and end read ID ('end' is exlusive)
92 string bwt_buffer
[SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
];
94 int64 offset_diff_base
;
97 struct suffix_fetch_data_t
{
99 struct global_data_t
*parent
;
100 int bucket_start
, bucket_end
;
104 struct global_data_t
{
105 int node_bucket_start
, node_bucket_end
; // buckets to be processed on this node
110 uint64_t *cpu_sort_space
;
114 int words_per_suffix
;
115 int offset_num_bits
; // number of bits to represent 0...read_len
116 int offset_mask
; // -(1 << offset_num_bits)
118 int64 max_lv1_entries
;
119 int64 max_lv2_entries
;
121 int cur_special_handle_prefix
;
122 int64 num_lv2_to_output
;
125 int *buckets
, *section_buckets
[ NUM_CPU_THREADS
]; // buckets (aggregated for all read; for each section). TODO int64?
126 pthread_t threads
[ NUM_CPU_THREADS
];
127 struct section_data_t locals
[ NUM_CPU_THREADS
];
128 struct suffix_fetch_data_t suffix_fetches
[ NUM_CPU_THREADS
- 1 ];
129 int lv1_bucket_start
, lv1_bucket_end
; // 'end' is exclusive
130 int lv2_bucket_start
, lv2_bucket_end
; // 'end' is exclusive
131 int64 lv2_offset_start_index
;
132 int64 lv2_suffixes_row_width
;
134 word
*sort_indexes
; //permutation.
135 word
*sort_indexes_to_output
;
136 unsigned int *lv2_read_id
;
137 unsigned int *lv2_read_id_to_output
;
139 word
*lv2_suffixes_to_output
;
143 vector
<int64
> v_large_diff
;
144 pthread_spinlock_t large_diff_lock
;
147 typedef struct lib_info
154 int rd_len_cutoff
; //read length cutoff
161 //whether last read is read1 in pair
162 int paired
; // 0 -- single; 1 -- read1; 2 -- read2;
178 int num_p_file
; //fasta only
186 char ** b_fname
; //the name of the bam file
187 int num_b_file
; //the number of the bam file
191 //***************** define structure finished.
193 //***************** Initialize start.
194 //initial dna_map, here N is treated as G.
195 void init_dna_map() {
196 for (int i
='A';i
<='Z';++i
) dna_map
[i
] = 2; // G
197 for (int i
='a';i
<='z';++i
) dna_map
[i
] = 2; // G
198 dna_map
['A'] = dna_map
['a'] = 0;
199 dna_map
['C'] = dna_map
['c'] = 1;
200 dna_map
['G'] = dna_map
['g'] = 2;
201 dna_map
['T'] = dna_map
['t'] = 3;
202 dna_map
['N'] = dna_map
['n'] = 2; // G
205 static boolean
splitColumn ( char * line
)
207 int len
= strlen ( line
);
213 if ( line
[i
] >= 32 && line
[i
] <= 126 && line
[i
] != '=' )
216 while ( i
< len
&& line
[i
] >= 32 && line
[i
] <= 126 && line
[i
] != '=' )
218 tabs
[tabs_n
][j
++] = line
[i
];
222 tabs
[tabs_n
][j
] = '\0';
236 static int cmp_lib ( const void * a
, const void * b
)
239 A
= ( LIB_INFO
* ) a
;
240 B
= ( LIB_INFO
* ) b
;
242 if ( A
->avg_ins
> B
->avg_ins
)
244 if ( A
->avg_ins
== B
->avg_ins
)
249 void scan_libInfo ( char * libfile
, int *lib_num
, int *read_len
)
255 FILE* fp
= fopen ( libfile
, "r" );
259 while ( fgets ( line
, 1024, fp
) )
263 if ( strcmp ( line
, "[LIB]" ) == 0 )
269 flag
= splitColumn ( line
);
274 if ( strcmp ( tabs
[0], "max_rd_len" ) == 0 || strcmp ( tabs
[0], "max_read_len" ) == 0)
275 maxReadLen
= atoi ( tabs
[1] );
281 fprintf ( stderr
, "Config file error: no [LIB] in file\n" );
285 *read_len
= maxReadLen
;
287 //count file numbers of each type
288 lib_array
= ( LIB_INFO
* ) calloc (1, num_libs
* sizeof ( LIB_INFO
) );
289 pe
= ( boolean
* ) calloc (1, num_libs
* sizeof ( boolean
) );
291 for ( i
= 0; i
< num_libs
; i
++ )
293 lib_array
[i
].qual
= 74;
295 lib_array
[i
].avg_ins
= 0;
296 lib_array
[i
].asm_flag
= 3;
297 lib_array
[i
].rank
= 0;
298 lib_array
[i
].pair_num_cut
= 0;
299 lib_array
[i
].rd_len_cutoff
= 0;
300 lib_array
[i
].map_len
= 0;
301 lib_array
[i
].num_s_a_file
= 0;
302 lib_array
[i
].num_s_q_file
= 0;
303 lib_array
[i
].num_p_file
= 0;
304 lib_array
[i
].num_a1_file
= 0;
305 lib_array
[i
].num_a2_file
= 0;
306 lib_array
[i
].num_q1_file
= 0;
307 lib_array
[i
].num_q2_file
= 0;
308 lib_array
[i
].num_b_file
= 0; //init
315 while ( fgets ( line
, 1024, fp
) )
320 if ( strcmp ( line
, "[LIB]" ) == 0 )
327 flag
= splitColumn ( line
);
334 if ( strcmp ( tabs
[0], "f1" ) == 0 )
336 lib_array
[i
].num_a1_file
++;
339 else if ( strcmp ( tabs
[0], "q1" ) == 0 )
341 lib_array
[i
].num_q1_file
++;
344 else if ( strcmp ( tabs
[0], "f2" ) == 0 )
346 lib_array
[i
].num_a2_file
++;
349 else if ( strcmp ( tabs
[0], "q2" ) == 0 )
351 lib_array
[i
].num_q2_file
++;
354 else if ( strcmp ( tabs
[0], "f" ) == 0 )
356 lib_array
[i
].num_s_a_file
++;
358 else if ( strcmp ( tabs
[0], "q" ) == 0 )
360 lib_array
[i
].num_s_q_file
++;
362 else if ( strcmp ( tabs
[0], "p" ) == 0 )
364 lib_array
[i
].num_p_file
++;
367 else if ( strcmp ( tabs
[0], "b" ) == 0 ) // the bam file
369 lib_array
[i
].num_b_file
++;
374 //allocate memory for filenames
375 for ( i
= 0; i
< num_libs
; i
++ )
377 if ( lib_array
[i
].num_a2_file
!= lib_array
[i
].num_a1_file
)
379 fprintf ( stderr
, "Config file error: the number of mark \"f1\" is not the same as \"f2\"!\n" );
383 if ( lib_array
[i
].num_q2_file
!= lib_array
[i
].num_q1_file
)
385 fprintf ( stderr
, "Config file error: the number of mark \"q1\" is not the same as \"q2\"!\n" );
389 if ( lib_array
[i
].num_s_a_file
)
391 lib_array
[i
].s_a_fname
= ( char ** ) calloc (1, lib_array
[i
].num_s_a_file
* sizeof ( char * ) );
393 for ( j
= 0; j
< lib_array
[i
].num_s_a_file
; j
++ )
395 lib_array
[i
].s_a_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
399 if ( lib_array
[i
].num_s_q_file
)
401 lib_array
[i
].s_q_fname
= ( char ** ) calloc (1, lib_array
[i
].num_s_q_file
* sizeof ( char * ) );
403 for ( j
= 0; j
< lib_array
[i
].num_s_q_file
; j
++ )
405 lib_array
[i
].s_q_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
409 if ( lib_array
[i
].num_p_file
)
411 lib_array
[i
].p_fname
= ( char ** ) calloc (1, lib_array
[i
].num_p_file
* sizeof ( char * ) );
413 for ( j
= 0; j
< lib_array
[i
].num_p_file
; j
++ )
415 lib_array
[i
].p_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
419 if ( lib_array
[i
].num_a1_file
)
421 lib_array
[i
].a1_fname
= ( char ** ) calloc (1, lib_array
[i
].num_a1_file
* sizeof ( char * ) );
423 for ( j
= 0; j
< lib_array
[i
].num_a1_file
; j
++ )
425 lib_array
[i
].a1_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
429 if ( lib_array
[i
].num_a2_file
)
431 lib_array
[i
].a2_fname
= ( char ** ) calloc (1, lib_array
[i
].num_a2_file
* sizeof ( char * ) );
433 for ( j
= 0; j
< lib_array
[i
].num_a2_file
; j
++ )
435 lib_array
[i
].a2_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
439 if ( lib_array
[i
].num_q1_file
)
441 lib_array
[i
].q1_fname
= ( char ** ) calloc (1, lib_array
[i
].num_q1_file
* sizeof ( char * ) );
443 for ( j
= 0; j
< lib_array
[i
].num_q1_file
; j
++ )
445 lib_array
[i
].q1_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
449 if ( lib_array
[i
].num_q2_file
)
451 lib_array
[i
].q2_fname
= ( char ** ) calloc (1, lib_array
[i
].num_q2_file
* sizeof ( char * ) );
453 for ( j
= 0; j
< lib_array
[i
].num_q2_file
; j
++ )
455 lib_array
[i
].q2_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) );
459 if ( lib_array
[i
].num_b_file
) //allot memory for bam file name
461 lib_array
[i
].b_fname
= ( char ** ) calloc (1, lib_array
[i
].num_b_file
* sizeof ( char * ) );
463 for ( j
= 0; j
< lib_array
[i
].num_b_file
; j
++ )
464 { lib_array
[i
].b_fname
[j
] = ( char * ) calloc (1, 1024 * sizeof ( char ) ); }
469 for ( i
= 0; i
< num_libs
; i
++ )
471 lib_array
[i
].num_s_a_file
= 0;
472 lib_array
[i
].num_s_q_file
= 0;
473 lib_array
[i
].num_p_file
= 0;
474 lib_array
[i
].num_a1_file
= 0;
475 lib_array
[i
].num_a2_file
= 0;
476 lib_array
[i
].num_q1_file
= 0;
477 lib_array
[i
].num_q2_file
= 0;
478 lib_array
[i
].num_b_file
= 0; //init
484 while ( fgets ( line
, 1024, fp
) )
489 if ( strcmp ( line
, "[LIB]" ) == 0 )
496 flag
= splitColumn ( line
);
503 if ( strcmp ( tabs
[0], "f1" ) == 0 )
505 index
= lib_array
[i
].num_a1_file
++;
506 strcpy ( lib_array
[i
].a1_fname
[index
], tabs
[1] );
508 else if ( strcmp ( tabs
[0], "q1" ) == 0 )
510 index
= lib_array
[i
].num_q1_file
++;
511 strcpy ( lib_array
[i
].q1_fname
[index
], tabs
[1] );
513 else if ( strcmp ( tabs
[0], "f2" ) == 0 )
515 index
= lib_array
[i
].num_a2_file
++;
516 strcpy ( lib_array
[i
].a2_fname
[index
], tabs
[1] );
518 if ( strcmp ( lib_array
[i
].a2_fname
[index
], lib_array
[i
].a1_fname
[index
] ) == 0 )
520 fprintf ( stderr
, "Config file error: f2 file is the same as f1 file\n" );
521 fprintf ( stderr
, "f1=%s\n", lib_array
[i
].a1_fname
[index
] );
522 fprintf ( stderr
, "f2=%s\n", lib_array
[i
].a2_fname
[index
] );
526 else if ( strcmp ( tabs
[0], "q2" ) == 0 )
528 index
= lib_array
[i
].num_q2_file
++;
529 strcpy ( lib_array
[i
].q2_fname
[index
], tabs
[1] );
531 if ( strcmp ( lib_array
[i
].q2_fname
[index
], lib_array
[i
].q1_fname
[index
] ) == 0 )
533 fprintf ( stderr
, "Config file error: q2 file is the same as q1 file\n" );
534 fprintf ( stderr
, "q1=%s\n", lib_array
[i
].q1_fname
[index
] );
535 fprintf ( stderr
, "q2=%s\n", lib_array
[i
].q2_fname
[index
] );
539 else if ( strcmp ( tabs
[0], "f" ) == 0 )
541 index
= lib_array
[i
].num_s_a_file
++;
542 strcpy ( lib_array
[i
].s_a_fname
[index
], tabs
[1] );
544 else if ( strcmp ( tabs
[0], "q" ) == 0 )
546 index
= lib_array
[i
].num_s_q_file
++;
547 strcpy ( lib_array
[i
].s_q_fname
[index
], tabs
[1] );
549 else if ( strcmp ( tabs
[0], "p" ) == 0 )
551 index
= lib_array
[i
].num_p_file
++;
552 strcpy ( lib_array
[i
].p_fname
[index
], tabs
[1] );
554 else if ( strcmp ( tabs
[0], "b" ) == 0 )
557 index
= lib_array
[i
].num_b_file
++;
558 strcpy ( lib_array
[i
].b_fname
[index
], tabs
[1] );
560 else if ( strcmp ( tabs
[0], "min_ins" ) == 0 )
562 lib_array
[i
].min_ins
= atoi ( tabs
[1] );
564 else if ( strcmp ( tabs
[0], "max_ins" ) == 0 )
566 lib_array
[i
].max_ins
= atoi ( tabs
[1] );
568 else if ( strcmp ( tabs
[0], "avg_ins" ) == 0 )
570 lib_array
[i
].avg_ins
= atoi ( tabs
[1] );
572 else if ( strcmp ( tabs
[0], "sd" ) == 0)
574 lib_array
[i
].sd
= atoi(tabs
[1]);
576 else if( strcmp ( tabs
[0], "qual_cutoff" ) == 0 )
578 lib_array
[i
].qual
= atoi(tabs
[1]);
580 else if ( strcmp ( tabs
[0], "rd_len_cutoff" ) == 0 )
582 lib_array
[i
].rd_len_cutoff
= atoi ( tabs
[1] );
584 else if ( strcmp ( tabs
[0], "reverse_seq" ) == 0 )
586 lib_array
[i
].reverse
= atoi ( tabs
[1] );
588 else if ( strcmp ( tabs
[0], "asm_flags" ) == 0 )
590 lib_array
[i
].asm_flag
= atoi ( tabs
[1] );
592 else if ( strcmp ( tabs
[0], "rank" ) == 0 )
594 lib_array
[i
].rank
= atoi ( tabs
[1] );
596 else if ( strcmp ( tabs
[0], "pair_num_cutoff" ) == 0 )
598 lib_array
[i
].pair_num_cut
= atoi ( tabs
[1] );
600 else if ( strcmp ( tabs
[0], "rd_len_cutoff" ) == 0 )
602 lib_array
[i
].rd_len_cutoff
= atoi ( tabs
[1] );
604 else if ( strcmp ( tabs
[0], "map_len" ) == 0 )
606 lib_array
[i
].map_len
= atoi ( tabs
[1] );
610 for ( i
= 0; i
< num_libs
; i
++ )
612 if ( pe
[i
] && lib_array
[i
].avg_ins
== 0 )
614 fprintf ( stderr
, "Config file error: PE reads need avg_ins in [LIB] %d\n", i
+ 1 );
619 if( pe
[i
] && lib_array
[i
].sd
== 0)
621 if(lib_array
[i
].max_ins
> 0 || lib_array
[i
].min_ins
> 0)
623 if( lib_array
[i
].max_ins
> 0 && lib_array
[i
].min_ins
> 0 )
624 lib_array
[i
].sd
= lib_array
[i
].max_ins
- lib_array
[i
].avg_ins
> lib_array
[i
].min_ins
- lib_array
[i
].avg_ins
? (lib_array
[i
].max_ins
- lib_array
[i
].avg_ins
)/3 : (lib_array
[i
].min_ins
- lib_array
[i
].avg_ins
)/3;
625 else if( lib_array
[i
].max_ins
> 0 )
626 lib_array
[i
].sd
= (lib_array
[i
].max_ins
- lib_array
[i
].avg_ins
)/3;
628 lib_array
[i
].sd
= (lib_array
[i
].min_ins
- lib_array
[i
].avg_ins
)/3;
630 lib_array
[i
].sd
= lib_array
[i
].avg_ins
* 0.05;
637 qsort ( &lib_array
[0], num_libs
, sizeof ( LIB_INFO
), cmp_lib
);
640 void free_libs (LIB_INFO
* lib_array
, int num_libs
)
648 fprintf ( stderr
, "LIB(s) information:\n" );
650 for ( i
= 0; i
< num_libs
; i
++ )
652 fprintf ( stderr
, " [LIB] %d, avg_ins %d, reverse %d.\n", i
, lib_array
[i
].avg_ins
, lib_array
[i
].reverse
);
654 if ( lib_array
[i
].num_s_a_file
)
656 //printf("%d single fasta files\n",lib_array[i].num_s_a_file);
657 for ( j
= 0; j
< lib_array
[i
].num_s_a_file
; j
++ )
659 free ( ( void * ) lib_array
[i
].s_a_fname
[j
] );
662 free ( ( void * ) lib_array
[i
].s_a_fname
);
665 if ( lib_array
[i
].num_s_q_file
)
667 //printf("%d single fastq files\n",lib_array[i].num_s_q_file);
668 for ( j
= 0; j
< lib_array
[i
].num_s_q_file
; j
++ )
670 free ( ( void * ) lib_array
[i
].s_q_fname
[j
] );
673 free ( ( void * ) lib_array
[i
].s_q_fname
);
676 if ( lib_array
[i
].num_p_file
)
678 //printf("%d paired fasta files\n",lib_array[i].num_p_file);
679 for ( j
= 0; j
< lib_array
[i
].num_p_file
; j
++ )
681 free ( ( void * ) lib_array
[i
].p_fname
[j
] );
684 free ( ( void * ) lib_array
[i
].p_fname
);
687 if ( lib_array
[i
].num_a1_file
)
689 //printf("%d read1 fasta files\n",lib_array[i].num_a1_file);
690 for ( j
= 0; j
< lib_array
[i
].num_a1_file
; j
++ )
692 free ( ( void * ) lib_array
[i
].a1_fname
[j
] );
695 free ( ( void * ) lib_array
[i
].a1_fname
);
698 if ( lib_array
[i
].num_a2_file
)
700 //printf("%d read2 fasta files\n",lib_array[i].num_a2_file);
701 for ( j
= 0; j
< lib_array
[i
].num_a2_file
; j
++ )
703 free ( ( void * ) lib_array
[i
].a2_fname
[j
] );
706 free ( ( void * ) lib_array
[i
].a2_fname
);
709 if ( lib_array
[i
].num_q1_file
)
711 //printf("%d read1 fastq files\n",lib_array[i].num_q1_file);
712 for ( j
= 0; j
< lib_array
[i
].num_q1_file
; j
++ )
714 free ( ( void * ) lib_array
[i
].q1_fname
[j
] );
717 free ( ( void * ) lib_array
[i
].q1_fname
);
720 if ( lib_array
[i
].num_q2_file
)
722 //printf("%d read2 fastq files\n",lib_array[i].num_q2_file);
723 for ( j
= 0; j
< lib_array
[i
].num_q2_file
; j
++ )
725 free ( ( void * ) lib_array
[i
].q2_fname
[j
] );
728 free ( ( void * ) lib_array
[i
].q2_fname
);
731 if ( lib_array
[i
].num_b_file
)
733 //free the bam file name
734 //printf("%d bam files\n",lib_array[i].num_b_file);
735 for ( j
= 0; j
< lib_array
[i
].num_b_file
; j
++ )
736 { free ( ( void * ) lib_array
[i
].b_fname
[j
] ); }
738 free ( ( void * ) lib_array
[i
].b_fname
);
743 free ( ( void * ) lib_array
);
745 //***************** Initialize finished.
747 //***************** for debugging only start
748 void debug_prefix( FILE* file
, word prefix
) {
749 for (int i
= PREFIX_LENGTH
- 1; i
>= 0; --i
) {
752 fprintf( file
, "%c", dna_char
[c
] );
754 fprintf( file
, "^r" );
757 void log(const char* format
, ...) { //add const to char* can remove warning.
759 va_start( args
, format
);
760 vfprintf( stderr
, format
, args
);
765 // for debugging only
766 void debug_word( FILE* file
, word w
) {
767 for (int i
= CHARS_PER_WORD
- 1; i
>= 0; --i
) {
768 int c
= (w
>>(BITS_PER_CHAR
*i
)) & CHAR_MASK
;
769 fprintf( file
, "%c", dna_char
[c
] );
773 // for debugging only
774 void dump_special_prefix(int key
) {
775 for (int i
= 0; i
< SPECIAL_HANDLE_SUFFIX_LENGTH
; ++i
) {
776 log("%c", dna_char
[key
/ (SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
/ 5)]);
777 key
= key
* 5 % SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
;
783 void dump_read(word
*read_p
, int words_per_read
, int read_len
) {
784 for (int i
= 0; i
< words_per_read
; ++i
) {
785 for (int j
= 0; j
< CHARS_PER_WORD
; ++j
) {
786 log("%c", dna_char
[(read_p
[i
] >> (CHARS_PER_WORD
- 1 - j
) * BITS_PER_CHAR
) & 3]);
787 if (i
* CHARS_PER_WORD
+ j
== read_len
- 1) {
795 void debug_suffix( word
* start
, struct global_data_t
*_data
) {
796 struct global_data_t
&globals
= *_data
;
797 for (int j
=0; j
<globals
.words_per_read
; ++j
) {
798 word w
= *(start
+ j
* globals
.lv2_suffixes_row_width
);
799 for (int i
= CHARS_PER_WORD
- 1; i
>= 0; --i
) {
800 int c
= (w
>>(BITS_PER_CHAR
*i
)) & CHAR_MASK
;
801 fprintf(stderr
, "%c", dna_char
[c
]);
805 //***************** for debugging only end
807 inline void* MallocAndCheck(size_t size_in_byte
,
808 const char *malloc_from_which_file
= __FILE__
,
809 int malloc_from_which_line
= __LINE__
) {
810 void *ptr
= malloc(size_in_byte
);
811 if (ptr
== NULL
&& size_in_byte
!= 0) {
812 log( "[ERROR] Ran out of memory while applying %llubytes\n", (unsigned long long)size_in_byte
);
813 log( "In file: %s, line %d\n", malloc_from_which_file
, malloc_from_which_line
);
814 log( "There may be errors as follows:\n");
815 log( "1) Not enough memory.\n");
816 log( "2) The ARRAY may be overrode.\n");
817 log( "3) The wild pointers.\n");
824 inline bool is_valid_special_prefix(int prefix_key
) {
826 if (prefix_key
% 5 != 4) { return false; }
827 for (int i
= 0, start
= 0; i
< SPECIAL_HANDLE_SUFFIX_LENGTH
; ++i
) {
828 if (prefix_key
% 5 != 4) {
835 return prefix_key
== 0;
838 void* preprocess_fill_buckets( void *_data
) {
839 struct section_data_t
&locals
= *( (struct section_data_t
*)_data
);
840 struct global_data_t
&globals
= *(locals
.parent
);
841 int section_id
= locals
.section_id
;
842 int *buckets
= globals
.section_buckets
[ section_id
];
844 int read_len
= globals
.read_len
;
845 int words_per_read
= globals
.words_per_read
;
846 int w_shift
= BITS_PER_WORD
- BITS_PER_CHAR
;
848 for ( int64 r
= locals
.section_start
; r
< locals
.section_end
; ++r
) {
849 word
*w_p
= globals
.packed_reads
+ ( words_per_read
* r
);
852 for ( int i
= 0; i
< PREFIX_LENGTH
- 1; ++i
) {
853 key
= key
* 5 + ( w
>> w_shift
); // replaced by this
856 for ( int i
= PREFIX_LENGTH
- 1; i
< read_len
; ++i
) {
857 if ( i
% CHARS_PER_WORD
== 0 ) // TODO optimize
859 key
= ( key
* 5 + ( w
>> w_shift
) ) % NUM_BUCKETS
; // replaced by this
863 int special_key
= (key
* 5 + SENTINEL_INT
) % (SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
* 5);
865 for ( int i
= PREFIX_LENGTH
- 1; i
>= SPECIAL_HANDLE_SUFFIX_LENGTH
; --i
) {
866 key
= ( key
* 5 + SENTINEL_INT
) % NUM_BUCKETS
; // replaced by this
870 // special handle buckets
871 for ( int i
= 0; i
< SPECIAL_HANDLE_SUFFIX_LENGTH
; ++i
) {
872 int index
= special_key
% SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
;
873 assert(is_valid_special_prefix(index
));
874 assert(special_key
/ SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
!= 4);
875 locals
.bwt_buffer
[index
].push_back(dna_char
[ special_key
/ SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
]);
876 special_key
= (special_key
* 5 + SENTINEL_INT
) % (SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
* 5);
883 void* lv1_fetch_offsets( void *_data
) {
884 struct section_data_t
&locals
= *( (struct section_data_t
*)_data
);
885 struct global_data_t
&globals
= *(locals
.parent
);
887 const int bucket_start
= globals
.lv1_bucket_start
;
888 const int bucket_end
= globals
.lv1_bucket_end
;
890 const int section_start
= locals
.section_start
;
891 const int section_end
= locals
.section_end
;
893 int read_len
= globals
.read_len
;
894 int words_per_read
= globals
.words_per_read
;
895 int w_shift
= BITS_PER_WORD
- BITS_PER_CHAR
;
896 int64
*offset_pos
= locals
.offset_pos
;
898 int id_shift
= globals
.offset_num_bits
;
899 int64
*recent_offsets
= (int64
*) malloc( NUM_BUCKETS
* sizeof( int64
) ); // for calculating differences
900 int64 recent_base
= ( locals
.section_start
<< id_shift
); // offset of first read first char
901 for ( int b
= bucket_start
; b
< bucket_end
; ++b
) {
902 recent_offsets
[b
] = recent_base
;
904 locals
.offset_diff_base
= recent_base
;
906 // scan all reads and record offsets
907 for ( int64 r
= section_start
; r
< section_end
; ++r
) {
908 word
*w_p
= globals
.packed_reads
+ ( words_per_read
* r
);
911 // int prev_char = SENTINEL_INT;
912 for ( int i
= 0; i
< PREFIX_LENGTH
- 1; ++i
) {
913 // key = ((key<<BITS_PER_CHAR) | (w>>w_shift));
914 key
= key
* 5 + ( w
>> w_shift
); // replaced by this
917 for ( int i
= PREFIX_LENGTH
- 1; i
< read_len
; ++i
) {
918 if ( i
% CHARS_PER_WORD
== 0 ) // TODO optimize
920 // key = (((key<<BITS_PER_CHAR)&PREFIX_MASK)|(w>>w_shift));
921 key
= ( key
* 5 + ( w
>> w_shift
) ) % NUM_BUCKETS
; // replaced by this
923 // check if key within range
924 // TODO 0x80000000 hardcode
925 // this condition is equivalent to bucket_start <= key < bucket_end
926 if ( (( key
- bucket_start
) ^ ( key
- bucket_end
)) & 0x80000000 ) {
927 // read ID = r, within-read offset = i - PREFIX_LENGTH + 1
928 int64 true_offset
= ( r
<< id_shift
) | ( i
- PREFIX_LENGTH
+ 1 );
929 int64 diff_offset
= true_offset
- recent_offsets
[ key
];
931 if ( diff_offset
< 0 ) { // TODO remove this assertion
932 log("ERROR: offset diff negative (%lld %lld)! ABORT.\n", recent_offsets
[key
], true_offset
);
935 if ( diff_offset
> 2147483647 ) {
936 pthread_spin_lock(&globals
.large_diff_lock
);
937 diff_offset
= -1 - (int64
)globals
.v_large_diff
.size();
938 globals
.v_large_diff
.push_back(true_offset
);
939 pthread_spin_unlock(&globals
.large_diff_lock
);
941 globals
.lv1_offsets
[ offset_pos
[key
]++ ] = diff_offset
;
942 recent_offsets
[ key
] = true_offset
;
945 for ( int i
= PREFIX_LENGTH
- 1; i
>= SPECIAL_HANDLE_SUFFIX_LENGTH
; --i
) {
946 key
= ( key
* 5 + SENTINEL_INT
) % NUM_BUCKETS
; // replaced by this
947 // check if key within range
948 // TODO 0x80000000 hardcode
949 // this condition is equivalent to bucket_start <= key < bucket_end
950 if ( (( key
- bucket_start
) ^ ( key
- bucket_end
)) & 0x80000000 ) {
951 // read ID = r, within-read offset = read_len - i
952 int64 true_offset
= ( r
<< id_shift
) | ( read_len
- i
);
953 int64 diff_offset
= true_offset
- recent_offsets
[ key
];
955 if ( diff_offset
< 0 ) { // TODO remove this assertion
956 log("ERROR: offset diff negative (%lld %lld)! ABORT.\n", recent_offsets
[key
], true_offset
);
959 if ( diff_offset
> 2147483647 ) {
960 pthread_spin_lock(&globals
.large_diff_lock
);
961 diff_offset
= -1 - (int64
)globals
.v_large_diff
.size();
962 globals
.v_large_diff
.push_back(true_offset
);
963 pthread_spin_unlock(&globals
.large_diff_lock
);
965 globals
.lv1_offsets
[ offset_pos
[key
]++ ] = diff_offset
;
966 recent_offsets
[ key
] = true_offset
;
972 free( recent_offsets
);
977 void* lv2_fetch_suffixes( void *_data
){
978 struct suffix_fetch_data_t
&fetch
= *( (struct suffix_fetch_data_t
*)_data
);
979 struct global_data_t
&globals
= *(fetch
.parent
);
981 int read_len
= globals
.read_len
;
982 int offset_num_bits
= 1;
983 while ((1 << offset_num_bits
) - 1 < read_len
) {
986 int words_per_read
= globals
.words_per_read
;
987 int words_per_suffix
= globals
.words_per_suffix
;
988 const int bucket_start
= fetch
.bucket_start
;
989 const int bucket_end
= fetch
.bucket_end
;
990 word
*packed_reads
= globals
.packed_reads
;
992 int id_shift
= globals
.offset_num_bits
; // offsets are in the form [read_id][read_offset]; id_shift = # bits for read_offset
993 int read_offset_mask
= globals
.offset_mask
;
995 word
*suffix_p
= globals
.lv2_suffixes
+ fetch
.index_start
;
996 int num_suffixes
= fetch
.index_start
;
998 int *offset_p
= globals
.lv1_offsets
+ globals
.lv2_offset_start_index
+ fetch
.index_start
;
999 for ( int b
= bucket_start
; b
< bucket_end
; ++b
) {
1001 for ( int section
= 0; section
< globals
.num_cpu_threads
; ++section
) {
1003 int64 offset
= globals
.locals
[section
].offset_diff_base
; // reset per local bucket
1004 int64 num_in_section_bucket
= globals
.section_buckets
[section
][b
];
1005 while ( num_in_section_bucket
-- ) {
1006 if (*offset_p
< 0) {
1007 offset
= globals
.v_large_diff
[-(*offset_p
++) - 1];
1009 offset
+= *(offset_p
++); // remember we are using difference encoding
1011 // offset in the form [read_id][read_offset]
1012 int64 read_id
= offset
>> id_shift
;
1013 int read_offset
= offset
& read_offset_mask
;
1014 word
*read_p
= packed_reads
+ ( words_per_read
* read_id
);
1016 // find previous character (the char to be stored in BWT)
1018 if ( !read_offset
) { // special case
1019 prev_char
= SENTINEL_INT
;
1020 globals
.lv2_read_id
[ num_suffixes
] = read_id
;
1022 int prev_which_word
= ( read_offset
- 1 ) / CHARS_PER_WORD
;
1023 int prev_word_offset
= ( read_offset
- 1 ) % CHARS_PER_WORD
;
1024 word w
= *(read_p
+ prev_which_word
);
1025 prev_char
= ( w
>> ( BITS_PER_WORD
- BITS_PER_CHAR
- prev_word_offset
* 2 ) ) & CHAR_MASK
;
1028 // copy words of the suffix to the suffix pool
1029 int which_word
= read_offset
/ CHARS_PER_WORD
;
1030 int word_offset
= read_offset
% CHARS_PER_WORD
;
1031 word
*dest
= suffix_p
;
1032 word
*src
= read_p
+ which_word
;
1033 if ( !word_offset
) { // special case (word aligned), easy
1034 while ( which_word
< words_per_read
) {
1035 *dest
= *src
; // write out
1036 dest
+= globals
.lv2_suffixes_row_width
;
1040 } else { // not word-aligned
1041 int bit_shift
= read_offset
* 2;
1043 word d
= s
<< bit_shift
;
1045 while ( which_word
< words_per_read
) {
1047 d
|= s
>> (BITS_PER_WORD
- bit_shift
);
1048 *dest
= d
; // write out
1049 dest
+= globals
.lv2_suffixes_row_width
;
1054 *dest
= d
| ( 0xFFFFFFFF >> ( BITS_PER_WORD
- bit_shift
) ); // bit padding by '1's. TODO hard
1056 *(suffix_p
+ (globals
.lv2_suffixes_row_width
* (words_per_suffix
- 1) ) ) &= ~globals
.offset_mask
; // TODO hard
1057 *(suffix_p
+ (globals
.lv2_suffixes_row_width
* (words_per_suffix
- 1) ) ) |= read_offset
;
1059 globals
.sort_indexes
[ num_suffixes
] = ( prev_char
<< 29 ) | num_suffixes
; // TODO hard
1068 int get_base5_prefix(word item
) {
1069 item
>>= (CHARS_PER_WORD
- SPECIAL_HANDLE_SUFFIX_LENGTH
) * BITS_PER_CHAR
;
1070 static int base4_mask
= (1 << SPECIAL_HANDLE_SUFFIX_LENGTH
* BITS_PER_CHAR
) - 1;
1072 for (int i
= 0; i
< SPECIAL_HANDLE_SUFFIX_LENGTH
; ++i
) {
1073 ret
= ret
* 5 + (item
>> (SPECIAL_HANDLE_SUFFIX_LENGTH
- 1) * BITS_PER_CHAR
);
1074 item
= item
* 4 & base4_mask
;
1079 inline unsigned int mirror(unsigned int v
) {
1080 // swap consecutive pairs
1081 v
= ((v
>> 2) & 0x33333333) | ((v
& 0x33333333) << 2);
1083 v
= ((v
>> 4) & 0x0F0F0F0F) | ((v
& 0x0F0F0F0F) << 4);
1085 v
= ((v
>> 8) & 0x00FF00FF) | ((v
& 0x00FF00FF) << 8);
1086 // swap 2-byte long pairs
1087 v
= ( v
>> 16 ) | ( v
<< 16);
1091 inline void reverse_one_read(word
*read_p
, int words_per_read
, int last_shift
) {
1093 if (last_shift
> 0) {
1094 for (int i
= words_per_read
- 1; i
> 0; --i
) {
1095 read_p
[i
] >>= last_shift
;
1096 read_p
[i
] |= read_p
[i
- 1] << (BITS_PER_WORD
- last_shift
);
1098 read_p
[0] >>= last_shift
;
1099 read_p
[0] |= -(1 << (BITS_PER_WORD
- last_shift
));
1102 for (int i
= 0; i
< words_per_read
; ++i
) {
1103 read_p
[i
] = mirror(read_p
[i
]);
1107 void reverse_all_reads(global_data_t
&globals
) {
1108 omp_set_num_threads(globals
.num_cpu_threads
);
1109 int last_shift
= globals
.read_len
* BITS_PER_CHAR
% BITS_PER_WORD
;
1110 if (last_shift
!= 0) last_shift
= BITS_PER_WORD
- last_shift
;
1113 for (int64 i
= 0; i
< globals
.num_reads
; ++i
) {
1114 reverse_one_read(globals
.packed_reads
+ globals
.words_per_read
* i
, globals
.words_per_read
, last_shift
);
1117 int64 mid
= globals
.num_reads
/ 2;
1120 for (int64 i
= 0; i
< mid
; ++i
) {
1121 int64 i2
= globals
.num_reads
- 1 - i
;
1122 for (int j
= 0; j
< globals
.words_per_read
; ++j
) {
1123 swap(globals
.packed_reads
[i
* globals
.words_per_read
+ j
], globals
.packed_reads
[(i2
+ 1) * globals
.words_per_read
- 1 - j
]);
1127 if (globals
.num_reads
% 2 == 1) {
1128 for (int j
= 0; j
< globals
.words_per_read
/ 2; ++j
) {
1129 swap(globals
.packed_reads
[mid
* globals
.words_per_read
+ j
], globals
.packed_reads
[(mid
+ 1) * globals
.words_per_read
- 1 - j
]);
1134 void init_global_data(global_data_t
&globals
, int read_len
, int64 max_host_mem
, int number_cpu_threads
) {
1135 globals
.read_len
= read_len
;
1136 globals
.words_per_read
= ( read_len
+ ( CHARS_PER_WORD
- 1 ) ) / CHARS_PER_WORD
;
1137 globals
.num_reads
= 0;
1138 globals
.max_num_reads
= max_host_mem
* 0.8 / (globals
.words_per_read
* sizeof(word
));
1139 globals
.num_cpu_threads
= number_cpu_threads
;
1140 globals
.max_lv2_entries
= 0;
1142 globals
.offset_num_bits
= 1;
1143 while ((1 << globals
.offset_num_bits
) - 1 < read_len
) {
1144 ++globals
.offset_num_bits
;
1146 globals
.offset_mask
= (1 << globals
.offset_num_bits
) - 1;
1147 globals
.words_per_suffix
= (read_len
* BITS_PER_CHAR
+ globals
.offset_num_bits
+ BITS_PER_WORD
- 1) / BITS_PER_WORD
;
1149 log("Max host mem: %lld, max number of reads can be loaded: %lld\n", max_host_mem
, globals
.max_num_reads
);
1150 log("Words per read: %d, words per suffix: %d\n", globals
.words_per_read
, globals
.words_per_suffix
);
1153 void set_global_bucket(global_data_t
&globals
)
1155 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1156 globals
.locals
[t
].parent
= &globals
;
1157 globals
.locals
[t
].section_id
= t
;
1159 for ( int t
= 0; t
< globals
.num_cpu_threads
- 1; ++t
) {
1160 globals
.suffix_fetches
[t
].parent
= &globals
;
1161 globals
.suffix_fetches
[t
].thread_id
= t
;
1164 globals
.buckets
= NULL
;
1165 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1166 globals
.section_buckets
[t
] = NULL
;
1167 globals
.locals
[t
].offset_pos
= NULL
;
1171 int64 mem_packed_reads
= ( globals
.num_reads
+ 32 ) * globals
.words_per_read
* sizeof(word
); // +32 for safety
1172 globals
.packed_reads
= (word
*) realloc( globals
.packed_reads
, mem_packed_reads
);
1173 if ( ! globals
.packed_reads
) {
1174 fprintf( stderr
, "realloc error, quit\n" );
1177 log("Mem allocated for packed reads: %lld bytes\n", mem_packed_reads
);
1179 { // distribute reads among threads
1180 int64 each
= globals
.num_reads
/ globals
.num_cpu_threads
;
1181 for ( int t
= 0; t
< globals
.num_cpu_threads
- 1; ++t
) {
1182 globals
.locals
[t
].section_start
= t
* each
;
1183 globals
.locals
[t
].section_end
= (t
+ 1) * each
;
1185 globals
.locals
[ globals
.num_cpu_threads
- 1 ].section_start
= (globals
.num_cpu_threads
-1) * each
;
1186 globals
.locals
[ globals
.num_cpu_threads
- 1 ].section_end
= globals
.num_reads
;
1191 void set_global_after_reading_initial(global_data_t
&globals
, int64 max_host_mem
, int64 free_gpu_mem
) {
1192 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1193 globals
.locals
[t
].parent
= &globals
;
1194 globals
.locals
[t
].section_id
= t
;
1196 for ( int t
= 0; t
< globals
.num_cpu_threads
- 1; ++t
) {
1197 globals
.suffix_fetches
[t
].parent
= &globals
;
1198 globals
.suffix_fetches
[t
].thread_id
= t
;
1201 globals
.buckets
= NULL
;
1202 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1203 globals
.section_buckets
[t
] = NULL
;
1204 globals
.locals
[t
].offset_pos
= NULL
;
1208 int64 mem_packed_reads
= ( globals
.num_reads
+ 32 ) * globals
.words_per_read
* sizeof(word
); // +32 for safety
1209 globals
.packed_reads
= (word
*) realloc( globals
.packed_reads
, mem_packed_reads
);
1210 if ( ! globals
.packed_reads
) {
1211 fprintf( stderr
, "realloc error, quit\n" );
1214 log("Mem allocated for packed reads: %lld bytes\n", mem_packed_reads
);
1216 { // distribute reads among threads
1217 int64 each
= globals
.num_reads
/ globals
.num_cpu_threads
;
1218 for ( int t
= 0; t
< globals
.num_cpu_threads
- 1; ++t
) {
1219 globals
.locals
[t
].section_start
= t
* each
;
1220 globals
.locals
[t
].section_end
= (t
+ 1) * each
;
1222 globals
.locals
[ globals
.num_cpu_threads
- 1 ].section_start
= (globals
.num_cpu_threads
-1) * each
;
1223 globals
.locals
[ globals
.num_cpu_threads
- 1 ].section_end
= globals
.num_reads
;
1226 // preprocessing - compute memory limits
1228 free_gpu_mem
-= 1073741824; // remain 1G for b40c
1229 const int GPU_BYTES_PER_ENTRY
= 16; // key, value, and x2 for radix sort buffer
1230 int64 max_lv2_entries
= free_gpu_mem
/ GPU_BYTES_PER_ENTRY
;
1232 if (max_lv2_entries
>= (1 << 29)) {
1233 max_lv2_entries
= (1 << 29) - 1; // TODO hard
1235 globals
.max_lv2_entries
= max_lv2_entries
;
1236 log( "Free GPU MEM = %lld\n", free_gpu_mem
);
1238 int64 lv2_bytes_per_entry
= ( globals
.words_per_suffix
+ 1 + 1) * sizeof(word
); // read_id_table, permutation
1239 int64 mem_lv2
= lv2_bytes_per_entry
* max_lv2_entries
* 2; // *2 for double buffering
1241 log( "max host mem: %lld, mem_packed_reads %lld, mem_lv2 %lld, global.num_reads %lld", max_host_mem
, mem_packed_reads
, mem_lv2
, globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2);
1242 int64 avail_host_mem
= max_host_mem
- mem_packed_reads
- mem_lv2
- globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2;
1244 if(avail_host_mem
< 1024*1024)
1246 log( "Please set max_host_mem larger than: %lld\n", mem_packed_reads
+ mem_lv2
+ globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2 + 1024*1024);
1249 const int LV1_BYTES_PER_ENTRY
= 4; // 32-bit adjacent-difference representation
1250 int64 max_lv1_entries
= avail_host_mem
/ LV1_BYTES_PER_ENTRY
;
1251 globals
.max_lv1_entries
= max_lv1_entries
;
1253 log( "Avail host MEM = %lld\n", avail_host_mem
);
1254 log( "Max # of lv.1 entries = %lld\n", max_lv1_entries
);
1255 log( "Max # of lv.2 entries = %lld\n", max_lv2_entries
);
1259 globals
.cpu_sort_space
= (uint64_t*) MallocAndCheck(sizeof(uint64_t) * globals
.max_lv2_entries
, __FILE__
, __LINE__
); // as CPU memory is used to simulate GPU
1260 assert(globals
.cpu_sort_space
!= NULL
);
1263 // allocate memory for lv.1 and lv.2 arrays
1264 globals
.lv1_offsets
= (int*) malloc( max_lv1_entries
* sizeof(int) );
1265 globals
.lv2_suffixes
= (word
*) malloc( max_lv2_entries
* globals
.words_per_suffix
* sizeof(word
) );
1266 globals
.lv2_suffixes_row_width
= max_lv2_entries
;
1267 globals
.lv2_read_id
= (unsigned int*) malloc( max_lv2_entries
* sizeof(unsigned int) );
1268 globals
.sort_indexes
= (word
*) malloc( max_lv2_entries
* sizeof(word
) );
1270 globals
.lv2_suffixes_to_output
= (word
*) malloc( max_lv2_entries
* globals
.words_per_suffix
* sizeof(word
) );
1271 globals
.lv2_read_id_to_output
= (unsigned int*) malloc( max_lv2_entries
* sizeof(unsigned int) );
1272 globals
.sort_indexes_to_output
= (word
*) malloc( max_lv2_entries
* sizeof(word
) );
1274 pthread_spin_init(&globals
.large_diff_lock
, 0);
1277 void set_global_after_lv0(global_data_t
&globals
, int64 max_host_mem
, int64 free_gpu_mem
, uint64_t max_bucket_size
)
1279 int64 max_lv2_entries
;
1282 if(max_bucket_size
*1.2 < 2*1024*1024)
1283 max_lv2_entries
= 2*1024*1024;
1285 max_lv2_entries
= max_bucket_size
* 1.2;
1287 globals
.max_lv2_entries
= max_lv2_entries
;
1288 globals
.cpu_sort_space
= (uint64_t*) MallocAndCheck(sizeof(uint64_t) * globals
.max_lv2_entries
, __FILE__
, __LINE__
); // as CPU memory is used to simulate GPU
1289 assert(globals
.cpu_sort_space
!= NULL
);
1291 const int GPU_BYTES_PER_ENTRY
= 16; // key, value, and x2 for radix sort buffer
1292 max_lv2_entries
= (free_gpu_mem
- 1073741824) / GPU_BYTES_PER_ENTRY
;
1293 if (max_lv2_entries
>= (1 << 29)) {
1294 max_lv2_entries
= (1 << 29) - 1;
1296 if(max_lv2_entries
< (int64_t)max_bucket_size
)
1298 log ("Too many items for GPU sorting %lld > %lld! Please try larger GPU memory or use CPU version instead.", (int64_t)max_bucket_size
, max_lv2_entries
);
1301 globals
.max_lv2_entries
= max_lv2_entries
;
1303 int64 lv2_bytes_per_entry
= ( globals
.words_per_suffix
+ 1 + 1) * sizeof(word
); // read_id_table, permutation
1304 int64 mem_lv2
= lv2_bytes_per_entry
* max_lv2_entries
* 2; // *2 for double buffering
1306 int64 mem_packed_reads
= ( globals
.num_reads
+ 32 ) * globals
.words_per_read
* sizeof(word
); // +32 for safety
1308 log( "max host mem: %lld, mem_packed_reads %lld, mem_lv2 %lld, global.num_reads %lld", max_host_mem
, mem_packed_reads
, mem_lv2
, globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2);
1309 int64 avail_host_mem
= max_host_mem
- mem_packed_reads
- mem_lv2
- globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2;
1311 if(avail_host_mem
< 1024*1024)
1313 log( "Please set max_host_mem larger than: %lld\n", mem_packed_reads
+ mem_lv2
+ globals
.num_reads
* sizeof(char) * SPECIAL_HANDLE_SUFFIX_LENGTH
* 2 + 1024*1024);
1317 const int LV1_BYTES_PER_ENTRY
= 4; // 32-bit adjacent-difference representation
1318 int64 max_lv1_entries
= avail_host_mem
/ LV1_BYTES_PER_ENTRY
;
1319 globals
.max_lv1_entries
= max_lv1_entries
;
1321 log( "Avail host MEM = %lld\n", avail_host_mem
);
1322 log( "Max # of lv.1 entries = %lld\n", max_lv1_entries
);
1323 log( "Max # of lv.2 entries = %lld\n", max_lv2_entries
);
1326 globals
.lv1_offsets
= (int*) malloc( max_lv1_entries
* sizeof(int) );
1327 globals
.lv2_suffixes
= (word
*) malloc( max_lv2_entries
* globals
.words_per_suffix
* sizeof(word
) );
1328 globals
.lv2_suffixes_row_width
= max_lv2_entries
;
1329 globals
.lv2_read_id
= (unsigned int*) malloc( max_lv2_entries
* sizeof(unsigned int) );
1330 globals
.sort_indexes
= (word
*) malloc( max_lv2_entries
* sizeof(word
) );
1332 globals
.lv2_suffixes_to_output
= (word
*) malloc( max_lv2_entries
* globals
.words_per_suffix
* sizeof(word
) );
1333 globals
.lv2_read_id_to_output
= (unsigned int*) malloc( max_lv2_entries
* sizeof(unsigned int) );
1334 globals
.sort_indexes_to_output
= (word
*) malloc( max_lv2_entries
* sizeof(word
) );
1336 pthread_spin_init(&globals
.large_diff_lock
, 0);
1339 uint64_t lv0_fill_buckets(global_data_t
&globals
) {
1340 // preprocessing - fill buckets
1341 if (globals
.buckets
== NULL
)
1342 globals
.buckets
= (int*) malloc( NUM_BUCKETS
* sizeof(int) );
1343 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1344 if (globals
.section_buckets
[t
] == NULL
)
1345 globals
.section_buckets
[t
] = (int*) malloc( NUM_BUCKETS
* sizeof(int) );
1346 memset(globals
.section_buckets
[t
], 0, sizeof(int) * NUM_BUCKETS
);
1348 log( "Preprocessing: filling buckets... " );
1349 for ( int t
= 1; t
< globals
.num_cpu_threads
; ++t
) {
1350 pthread_create( &globals
.threads
[t
], NULL
, preprocess_fill_buckets
, &globals
.locals
[t
] );
1352 preprocess_fill_buckets( &globals
.locals
[0] );
1353 for ( int t
= 1; t
< globals
.num_cpu_threads
; ++t
) {
1354 pthread_join( globals
.threads
[t
], NULL
);
1358 // sum up local buckets
1359 memset( globals
.buckets
, 0, NUM_BUCKETS
* sizeof(int) );
1360 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1361 for ( int b
= 0; b
< NUM_BUCKETS
; ++b
)
1362 globals
.buckets
[b
] += globals
.section_buckets
[t
][b
];
1363 if (globals
.locals
[t
].offset_pos
== NULL
)
1364 globals
.locals
[t
].offset_pos
= (int64
*) malloc( NUM_BUCKETS
* sizeof( int64
) );
1365 memset(globals
.locals
[t
].offset_pos
, 0, sizeof(int64
) * NUM_BUCKETS
);
1368 int64_t max_buckets
=0;
1369 for (int i
= 0; i
< NUM_BUCKETS
; ++i
)
1371 if(globals
.max_lv2_entries
== 0)
1373 if(globals
.buckets
[i
] > max_buckets
)
1374 max_buckets
= globals
.buckets
[i
];
1376 if (globals
.buckets
[i
] > globals
.max_lv2_entries
) {
1377 fprintf(stderr
, "Too many items for GPU sorting! Bucket: %d, number of items: %d\n", i
, globals
.buckets
[i
]);
1382 if(globals
.max_lv2_entries
== 0)
1383 log ("Maximum Bucket size: %lld\n", max_buckets
);
1388 void* lv2_output_thread(void *_data
) {
1389 global_data_t
&globals
= *((global_data_t
*)_data
);
1390 log( "Writing output... \n" );
1391 for (int i
= 0; i
< globals
.num_lv2_to_output
; ++i
) {
1392 int c
= globals
.sort_indexes_to_output
[i
] >> 29; // TODO hard
1393 int cur_prefix
= get_base5_prefix( globals
.lv2_suffixes_to_output
[globals
.sort_indexes_to_output
[i
] & ((1 << 29) - 1)]);
1394 assert(cur_prefix
< SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
);
1395 while (cur_prefix
> globals
.cur_special_handle_prefix
) {
1396 dump_special_prefix(globals
.cur_special_handle_prefix
);
1397 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1398 fwrite( globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].c_str(), sizeof(char),
1399 globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].size(), globals
.out_file
);
1400 globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].clear();
1403 ++globals
.cur_special_handle_prefix
;
1404 while (globals
.cur_special_handle_prefix
< SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
) {
1405 if (is_valid_special_prefix(globals
.cur_special_handle_prefix
)) {
1408 ++globals
.cur_special_handle_prefix
;
1411 fprintf(globals
.out_file
, "%c", dna_char
[ c
] );
1412 if ( c
== SENTINEL_INT
&& globals
.need_ridt
) {
1413 // TODO offsets are accumulated....
1414 unsigned int id
= globals
.lv2_read_id_to_output
[ globals
.sort_indexes_to_output
[i
] & 0x1FFFFFFF ]; // TODO hard
1415 fwrite( &id
, sizeof(unsigned int), 1, globals
.map_out_file
);
1421 void build_bwt(global_data_t
&globals
, const char *output_prefix
, bool need_ridt
= false) {
1423 for (int64 i
= globals
.num_reads
- 10; i
< globals
.num_reads
; ++i
) {
1424 dump_read(globals
.packed_reads
+ i
* globals
.words_per_read
, globals
.words_per_read
, globals
.read_len
);
1427 pthread_t output_thread
;
1428 bool output_started
= false;
1431 globals
.out_file
= fopen64( (string(output_prefix
) + ".f.bwt.ascii").c_str(), "w" );
1433 globals
.out_file
= fopen64( (string(output_prefix
) + ".r.bwt.ascii").c_str(), "w" );
1434 globals
.need_ridt
= need_ridt
;
1437 globals
.map_out_file
= fopen64( (string(output_prefix
) + ".ridt").c_str(), "wb" );
1438 fwrite( &globals
.num_reads
, sizeof(unsigned int), 1, globals
.map_out_file
);
1440 // level 1 loop - pick consecutive buckets
1442 globals
.cur_special_handle_prefix
= 0;
1443 while (!is_valid_special_prefix(globals
.cur_special_handle_prefix
)) {
1444 ++globals
.cur_special_handle_prefix
;
1446 globals
.lv1_bucket_start
= 0;
1447 while ( globals
.lv1_bucket_start
< NUM_BUCKETS
) {
1449 globals
.v_large_diff
.clear();
1450 int64 num_lv1
= globals
.buckets
[ globals
.lv1_bucket_start
];
1451 int temp_end
= globals
.lv1_bucket_start
+ 1; // remember that 'end' is exclusive
1452 while ( temp_end
< NUM_BUCKETS
&& num_lv1
+ globals
.buckets
[ temp_end
] <= globals
.max_lv1_entries
) {
1453 num_lv1
+= globals
.buckets
[ temp_end
++ ];
1455 globals
.lv1_bucket_end
= temp_end
; // remember that 'end' is exclusive
1457 log( "Lv.1 Round %d: buckets %d to %d, num items = %lld\n", lv1_round
,
1458 globals
.lv1_bucket_start
, globals
.lv1_bucket_end
-1, num_lv1
);
1459 // lv 1 bucket start & end found! fetch the offsets now
1461 // compute bucket positions for the offset arrays
1463 int64
*pos
= globals
.locals
[0].offset_pos
;
1464 pos
[globals
.lv1_bucket_start
] = 0;
1465 for ( int i
= globals
.lv1_bucket_start
+1; i
< globals
.lv1_bucket_end
; ++i
) {
1466 pos
[i
] = pos
[i
-1] + globals
.buckets
[i
-1];
1470 for ( int t
= 1; t
< globals
.num_cpu_threads
; ++t
) { // t > 0
1471 int64
*pos
= globals
.locals
[t
].offset_pos
+ globals
.lv1_bucket_start
;
1472 int64
*pre_pos
= globals
.locals
[t
-1].offset_pos
+ globals
.lv1_bucket_start
;
1473 int *pre_buckets
= globals
.section_buckets
[t
-1] + globals
.lv1_bucket_start
;
1474 for ( int i
= globals
.lv1_bucket_start
; i
< globals
.lv1_bucket_end
; ++i
) {
1475 *(pos
++) = *(pre_pos
++) + *(pre_buckets
++);
1479 log( " Fetching offsets... " );
1480 for ( int t
= 1; t
< globals
.num_cpu_threads
; ++t
) {
1481 pthread_create( &globals
.threads
[t
], NULL
, lv1_fetch_offsets
, &globals
.locals
[t
] );
1483 lv1_fetch_offsets( &globals
.locals
[0] );
1484 for ( int t
= 1; t
< globals
.num_cpu_threads
; ++t
) {
1485 pthread_join( globals
.threads
[t
], NULL
);
1487 log( "Done. Number of large diff: %lld\n", globals
.v_large_diff
.size());
1491 globals
.lv2_offset_start_index
= 0;
1492 globals
.lv2_bucket_start
= globals
.lv1_bucket_start
;
1493 while ( globals
.lv2_bucket_start
< globals
.lv1_bucket_end
) {
1495 int64 num_lv2
= globals
.buckets
[ globals
.lv2_bucket_start
];
1496 if (num_lv2
> globals
.max_lv2_entries
) { // TROUBLESOME CASES
1497 fprintf( stderr
, "ERROR: Single bucket too large for lv.2: %lld (", num_lv2
);
1498 debug_prefix( stderr
, globals
.lv2_bucket_start
);
1499 fprintf( stderr
, ")\n");
1500 globals
.lv2_bucket_start
++;
1501 globals
.lv2_offset_start_index
+= num_lv2
;
1504 int temp_end2
= globals
.lv2_bucket_start
+ 1; // remember that 'end' is exclusive
1505 while ( temp_end2
< globals
.lv1_bucket_end
&& num_lv2
+ globals
.buckets
[ temp_end2
] <= globals
.max_lv2_entries
) {
1506 num_lv2
+= globals
.buckets
[ temp_end2
++ ];
1508 globals
.lv2_bucket_end
= temp_end2
; // remember that 'end' is exclusive
1510 if (num_lv2
== 0) { // TROUBLESOME CASES
1511 fprintf( stderr
, "ERROR: Single bucket too large for lv.2: %d (", globals
.buckets
[temp_end2
] );
1512 debug_prefix( stderr
, temp_end2
);
1513 fprintf( stderr
, ")\n");
1514 globals
.lv2_bucket_start
= temp_end2
+ 1;
1515 globals
.lv2_offset_start_index
+= globals
.buckets
[temp_end2
];
1520 log(" Lv.2 Round %d: buckets %d to %d, num items = %lld\n", lv2_round
,
1521 globals
.lv2_bucket_start
, globals
.lv2_bucket_end
-1, num_lv2
);
1522 // lv 2 bucket start & end found! fetch the suffixes now
1524 // distribute to threads
1525 int64 each
= num_lv2
/ ( globals
.num_cpu_threads
- 1 );
1526 each
= !each
? 1 : each
; // min is 1
1527 int b_start
= globals
.lv2_bucket_start
;
1528 int64 cumulate_num_items
= 0;
1529 for (int t
= 0; t
< (globals
.num_cpu_threads
-1) - 1; ++t
) {
1530 globals
.suffix_fetches
[t
].bucket_start
= b_start
;
1531 globals
.suffix_fetches
[t
].index_start
= cumulate_num_items
;
1532 int64 num_items
= 0;
1533 while ( num_items
< each
&& b_start
< globals
.lv2_bucket_end
) {
1534 num_items
+= globals
.buckets
[ b_start
++ ];
1536 globals
.suffix_fetches
[t
].bucket_end
= b_start
;
1537 cumulate_num_items
+= num_items
;
1539 globals
.suffix_fetches
[ (globals
.num_cpu_threads
-1) - 1 ].bucket_start
= b_start
;
1540 globals
.suffix_fetches
[ (globals
.num_cpu_threads
-1) - 1 ].index_start
= cumulate_num_items
;
1541 globals
.suffix_fetches
[ (globals
.num_cpu_threads
-1) -1 ].bucket_end
= globals
.lv2_bucket_end
;
1543 memset( globals
.lv2_suffixes
, 0xFF, globals
.lv2_suffixes_row_width
* globals
.words_per_suffix
* sizeof(word
) ); // important: must fill all '1' bits
1544 log( " -> Fetching suffixes... " );
1546 for ( int t
= 0; t
< globals
.num_cpu_threads
-1; ++t
) {
1547 pthread_create( &globals
.threads
[t
], NULL
, lv2_fetch_suffixes
, &globals
.suffix_fetches
[t
] );
1549 for ( int t
= 0; t
< globals
.num_cpu_threads
-1; ++t
) {
1550 pthread_join( globals
.threads
[t
], NULL
);
1553 // suffixes fetched! now sort them with GPU
1554 /////////////////////////////////// GPU SORTING ////////////////////////////////
1555 log( "Sorting suffixes with CPU ... " );
1557 omp_set_num_threads(globals
.num_cpu_threads
- 1);
1558 lv2_cpu_sort(globals
.lv2_suffixes
, globals
.sort_indexes
, globals
.cpu_sort_space
, globals
.words_per_suffix
, globals
.lv2_suffixes_row_width
, num_lv2
);
1559 omp_set_num_threads(globals
.num_cpu_threads
);
1561 lv2_gpu_sort(globals
.lv2_suffixes
, globals
.sort_indexes
, globals
.words_per_suffix
, globals
.lv2_suffixes_row_width
, num_lv2
);
1563 /////// output here ///////
1564 if (output_started
) {
1565 pthread_join(output_thread
, NULL
);
1567 swap(globals
.sort_indexes
, globals
.sort_indexes_to_output
);
1568 swap(globals
.lv2_suffixes
, globals
.lv2_suffixes_to_output
);
1569 swap(globals
.lv2_read_id
, globals
.lv2_read_id_to_output
);
1570 globals
.num_lv2_to_output
= num_lv2
;
1571 pthread_create(&output_thread
, NULL
, lv2_output_thread
, (void*)&globals
);
1572 output_started
= true;
1575 globals
.lv2_bucket_start
= globals
.lv2_bucket_end
;
1576 globals
.lv2_offset_start_index
+= num_lv2
;
1577 log( "Round ends.\n" );
1580 globals
.lv1_bucket_start
= globals
.lv1_bucket_end
;
1581 log( "Lv.1 Round %d ends\n\n", lv1_round
);
1585 if (output_started
) {
1586 pthread_join(output_thread
, NULL
);
1588 // print last BWT segment (those chars corresponding to '$')
1589 while (globals
.cur_special_handle_prefix
< SPECIAL_HANDLE_SUFFIX_NUM_BUCKETS
) {
1590 if (is_valid_special_prefix(globals
.cur_special_handle_prefix
)) {
1591 dump_special_prefix(globals
.cur_special_handle_prefix
);
1592 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
1593 fwrite( globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].c_str(), sizeof(char),
1594 globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].size(), globals
.out_file
);
1595 globals
.locals
[t
].bwt_buffer
[globals
.cur_special_handle_prefix
].clear();
1598 ++globals
.cur_special_handle_prefix
;
1601 fclose( globals
.out_file
);
1603 fclose( globals
.map_out_file
);
1606 void read_input_file( struct global_data_t
&globals
, char* filename
) {
1607 /////////////////// READ INPUT (FASTQ) /////////////////////
1608 int read_len
= globals
.read_len
;
1609 int words_per_read
= globals
.words_per_read
;
1610 word
*packed_reads
= NULL
;
1611 int64 num_reads
= 0;
1613 #define READ_BUFFER_SIZE 4096
1614 gzFile in_file
= gzopen( filename
, "r" );
1616 char buffer
[ READ_BUFFER_SIZE
+ 10 ];
1617 packed_reads
= (word
*) malloc( globals
.max_num_reads
* words_per_read
* sizeof(word
) );
1618 // memset( packed_reads, 0xFF, globals.max_num_reads * words_per_read * sizeof(word) );
1619 word
*packed_reads_p
= packed_reads
;
1620 int last_shift
= read_len
% CHARS_PER_WORD
? ( CHARS_PER_WORD
- read_len
% CHARS_PER_WORD
) * BITS_PER_CHAR
: 0;
1622 #define REFILL_BUFFER( _in_file, _buffer, _num_bytes ) \
1624 (_num_bytes) = gzread((_in_file),(_buffer),READ_BUFFER_SIZE); \
1625 (_buffer)[(_num_bytes)] = 0; \
1628 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1630 word
*read_start_ptr
;
1631 int is_FASTQ
= ( buffer
[0] == '@' );
1632 char read_terminator
= is_FASTQ
? '+' : '>';
1633 while ( num_bytes
) {
1634 read_start_ptr
= packed_reads_p
;
1635 // consume read name @xxxxxxx
1636 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1638 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1639 if ( !num_bytes
) break; // end of file
1640 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1642 if ( !num_bytes
) break; // end of file
1644 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1647 // consume read sequence ACGT
1650 while ( *p
!= read_terminator
) { // FASTQ: + FASTA: >
1652 if ( index
% CHARS_PER_WORD
== 0 ) {
1654 *packed_reads_p
= w
;
1659 w
= (w
<< BITS_PER_CHAR
) | dna_map
[(unsigned char)( *p
) ]; // order: MSB to LSB
1664 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1665 if ( !num_bytes
) break;
1670 *packed_reads_p
= w
<< last_shift
;
1671 *packed_reads_p
|= ( 0xFFFFFFFF >> ( BITS_PER_WORD
- last_shift
) ); // TODO hard. add padding
1676 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1678 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1679 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1682 // consume base quality
1683 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1685 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1686 if ( !num_bytes
) break; // end of file
1687 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1692 if (index
== globals
.read_len
) {
1694 } else { // trouble...
1695 log( "ERROR: read length of read #%lld is %d\n", num_reads
, index
);
1696 packed_reads_p
= read_start_ptr
;
1699 if ( !num_bytes
) break; // end of file
1704 globals
.packed_reads
= packed_reads
;
1705 globals
.num_reads
= num_reads
;
1707 ////////////////////// END READING INPUT //////////////////////
1710 void read_single_file(FILE *fq
, char* filename
, int64
* read_num
, int read_len
, int threshold
)
1712 /////////////////// READ INPUT (FASTQ) /////////////////////
1713 int64 num_reads
= 0;
1715 #define READ_BUFFER_SIZE 4096
1716 gzFile in_file
= gzopen( filename
, "r" );
1717 //#define GZBUFFER_SIZE 65536
1718 // gzbuffer( infile, GZBUFFER_SIZE ); // needs zlib 1.2.4
1720 char buffer
[ READ_BUFFER_SIZE
+ 10 ];
1722 //memset( packed_quals, 0, 1 * words_per_qual * sizeof(word) );
1724 #define REFILL_BUFFER( _in_file, _buffer, _num_bytes ) \
1726 (_num_bytes) = gzread((_in_file),(_buffer),READ_BUFFER_SIZE); \
1727 (_buffer)[(_num_bytes)] = 0; \
1730 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1732 word
*read_start_ptr
;
1733 int is_FASTQ
= ( buffer
[0] == '@' );
1734 char read_terminator
= is_FASTQ
? '+' : '>';
1735 while ( num_bytes
) {
1736 read_start_ptr
= packed_reads_p
;
1737 // consume read name @xxxxxxx
1738 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1740 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1741 if ( !num_bytes
) break; // end of file
1742 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1744 if ( !num_bytes
) break; // end of file
1746 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1749 // consume read sequence ACGT
1752 while ( *p
!= read_terminator
) { // FASTQ: + FASTA: >
1754 if ( index
% CHARS_PER_WORD
== 0 ) {
1756 *packed_reads_p
= w
;
1761 w
= (w
<< BITS_PER_CHAR
) | dna_map
[(unsigned char)( *p
)]; // order: MSB to LSB
1766 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1767 if ( !num_bytes
) break;
1772 *packed_reads_p
= w
<< last_shift
;
1773 *packed_reads_p
|= ( 0xFFFFFFFF >> ( BITS_PER_WORD
- last_shift
) ); // TODO hard. add padding
1777 word
*packed_quals_p
= packed_quals
;
1779 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1781 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1782 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1786 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1787 if ( !num_bytes
) break;
1790 // consume base quality
1793 while( *p
!= '@' && *p
!= '\n')
1795 //fprintf(stderr, ";%c,%d", *p, w);
1796 if(index
% BITS_PER_WORD
== 0){
1798 *packed_quals_p
= w
;
1803 w
= (w
<< BITS_PER_QUAL
) | (*p
>= threshold
);
1807 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1808 if ( !num_bytes
) break;
1812 *packed_quals_p
= w
<< last_shift_qual
;
1813 //*packed_quals_p |= (0xFFFFFFFF >> ( BITS_PER_QUAL - last_shift_qual ));
1815 fwrite( packed_quals
, sizeof(word
), 1 * words_per_qual
, fq
);
1816 //memset( packed_quals, 0, 1 * words_per_qual * sizeof(word) );
1819 if (index
== read_len
) {
1821 } else { // trouble...
1822 log( "SINGLE READ ERROR: read length of read #%lld is %d\n", num_reads
, index
);
1823 packed_reads_p
= read_start_ptr
;
1826 if ( !num_bytes
) break; // end of file
1831 (*read_num
) = (*read_num
) + num_reads
;
1832 ////////////////////// END READING INPUT //////////////////////
1835 void read_pair_file(FILE *fq
, char* file1
, char* file2
, int64
* read_num
, int read_len
, int threshold
)
1837 /////////////////// READ INPUT (FASTQ) /////////////////////
1838 int64 num_reads
= 0;
1840 #define READ_BUFFER_SIZE 4096
1841 gzFile in_file
= gzopen( file1
, "r" );
1842 gzFile in_file2
= gzopen( file2
, "r" );
1843 //#define GZBUFFER_SIZE 65536
1844 // gzbuffer( infile, GZBUFFER_SIZE ); // needs zlib 1.2.4
1845 int num_bytes
= 0, num_bytes2
= 0;
1846 char buffer
[ READ_BUFFER_SIZE
+ 10 ];
1847 char buffer2
[ READ_BUFFER_SIZE
+ 10 ];
1849 // memset( packed_quals, 0, 1 * words_per_qual * sizeof(word) );
1850 #define REFILL_BUFFER( _in_file, _buffer, _num_bytes ) \
1852 (_num_bytes) = gzread((_in_file),(_buffer),READ_BUFFER_SIZE); \
1853 (_buffer)[(_num_bytes)] = 0; \
1856 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1857 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
1860 word
*read_start_ptr
;
1861 int is_FASTQ
= ( buffer
[0] == '@' );
1862 char read_terminator
= is_FASTQ
? '+' : '>';
1863 while ( num_bytes
&& num_bytes2
) {
1864 read_start_ptr
= packed_reads_p
;
1865 // consume read name @xxxxxxx
1866 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
1867 p2
= (char*) memchr( p2
, '\n', num_bytes2
- (p2
-buffer2
) );
1868 /*while (!p || !p2) {
1869 REFILL_BUFFER( in_file, buffer, num_bytes );
1870 REFILL_BUFFER( in_file2, buffer2, num_bytes2 );
1871 if ( !num_bytes || !num_bytes2) break; // end of file
1872 p = (char*) memchr( buffer, '\n', num_bytes );
1873 p2 = (char*) memchr( buffer2, '\n', num_bytes2 );
1877 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1880 p
= (char*) memchr( buffer
, '\n', num_bytes
);
1884 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
1887 p2
= (char*) memchr( buffer2
, '\n', num_bytes2
);
1889 if ( !num_bytes
|| !num_bytes2
) break; // end of file
1890 // consume read sequence ACGT
1893 //fprintf(stderr, "R1:");
1894 //if(*p == '@' || *p == '>')
1900 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1901 if ( !num_bytes
) break;
1907 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1908 if ( !num_bytes
) break;
1913 while ( *p
!= read_terminator
) { // FASTQ: + FASTA: >
1914 //fprintf(stderr, "%c", *p);
1916 if ( index
% CHARS_PER_WORD
== 0 ) {
1918 *packed_reads_p
= w
;
1923 w
= (w
<< BITS_PER_CHAR
) | dna_map
[(unsigned char)(*p
) ]; // order: MSB to LSB
1928 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
1929 if ( !num_bytes
) break;
1934 if (index
!= read_len
) {
1935 log( "ERROR: read length of read1 #%lld is %d\n", num_reads
, index
);
1940 log( " %d,%c", c
, *p
);
1947 //fprintf(stderr, "\nR2:");
1949 *packed_reads_p
= w
<< last_shift
;
1950 *packed_reads_p
|= ( 0xFFFFFFFF >> ( BITS_PER_WORD
- last_shift
) ); // TODO hard. add padding
1955 //if(*(p2) == '@' || *(p2) == '>')
1961 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
1962 if ( !num_bytes2
) break;
1969 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
1970 if ( !num_bytes2
) break;
1975 while ( *p2
!= read_terminator
) { // FASTQ: + FASTA: >
1976 //fprintf(stderr, "%c", *p2);
1978 if ( index
% CHARS_PER_WORD
== 0 ) {
1980 *packed_reads_p
= w
;
1985 w
= (w
<< BITS_PER_CHAR
) | dna_map
[(unsigned char) (*p2
) ]; // order: MSB to LSB
1990 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
1991 if ( !num_bytes2
) break;
1995 //fprintf(stderr, "\nq1:");
1997 *packed_reads_p
= w
<< last_shift
;
1998 *packed_reads_p
|= ( 0xFFFFFFFF >> ( BITS_PER_WORD
- last_shift
) ); // TODO hard. add padding
2001 if (index
!= read_len
) {
2002 log( "READ ERROR: read length of read2 #%lld is %d\n", num_reads
, index
);
2007 log( " %d,%c", c
, *p2
);
2015 word
*packed_quals_p1
= packed_quals
;
2016 word
*packed_quals_p2
= packed_quals
;
2018 p
= (char*) memchr( p
, '\n', num_bytes
- (p
-buffer
) );
2020 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
2021 p
= (char*) memchr( buffer
, '\n', num_bytes
);
2028 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
2029 if ( !num_bytes
) break;
2036 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
2037 if ( !num_bytes
) break;
2040 // consume base quality
2045 //fprintf(stderr, "%c", *p);
2046 if(index
% BITS_PER_WORD
== 0){
2048 *packed_quals_p1
= w
;
2053 w
= (w
<< BITS_PER_QUAL
) | (*p
>= threshold
);
2057 REFILL_BUFFER( in_file
, buffer
, num_bytes
);
2058 if ( !num_bytes
) break;
2063 //fprintf(stderr, "\nq2:");
2064 *packed_quals_p1
= w
<< last_shift_qual
;
2065 //*packed_quals_p1 |= (0xFFFFFFFF >> ( BITS_PER_QUAL - last_shift_qual ));
2067 fwrite( packed_quals
, sizeof(word
), 1 * words_per_qual
, fq
);
2068 //memset( packed_quals, 0, 1 * words_per_qual * sizeof(word) );
2070 p2
= (char*) memchr( p2
, '\n', num_bytes2
- (p2
-buffer2
) );
2072 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
2073 p2
= (char*) memchr( buffer2
, '\n', num_bytes2
);
2080 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
2081 if ( !num_bytes2
) break;
2088 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
2089 if ( !num_bytes2
) break;
2092 // consume base quality
2097 //fprintf(stderr, "%c", *p2);
2098 if(index
% BITS_PER_WORD
== 0){
2100 *packed_quals_p2
= w
;
2105 w
= (w
<< BITS_PER_QUAL
) | (*p2
>= threshold
);
2109 REFILL_BUFFER( in_file2
, buffer2
, num_bytes2
);
2110 if ( !num_bytes2
) break;
2115 //fprintf(stderr, "\n");
2116 *packed_quals_p2
= w
<< last_shift_qual
;
2117 //*packed_quals_p2 |= (0xFFFFFFFF >> ( BITS_PER_QUAL - last_shift_qual ));
2119 fwrite( packed_quals
, sizeof(word
), 1 * words_per_qual
, fq
);
2120 //memset( packed_quals, 0, 1 * words_per_qual * sizeof(word) );
2122 if (index
== read_len
) {
2124 } else { // trouble...
2125 log( "READ ERROR: read length of read #%lld is %d\n", num_reads
, index
);
2130 log( " %d,%c", c
, *p2
);
2136 packed_reads_p
= read_start_ptr
;
2139 if ( !num_bytes
) break; // end of file
2144 (*read_num
) = (*read_num
) + num_reads
;
2145 ////////////////////// END READING INPUT //////////////////////
2148 void lib2read( struct global_data_t
&globals
, char* lib_file
, char* prefix
)
2150 int num_libs
=0, i
, j
, read_length
= globals
.read_len
;
2151 int64 start_num
=0, g_num_reads
=0;
2153 g_packed_reads
= (word
*) malloc( globals
.max_num_reads
* globals
.words_per_read
* sizeof(word
) );
2154 //memset( g_packed_reads, 0, globals.max_num_reads * globals.words_per_read * sizeof(word) );
2155 last_shift
= globals
.read_len
% CHARS_PER_WORD
? ( CHARS_PER_WORD
- globals
.read_len
% CHARS_PER_WORD
) * BITS_PER_CHAR
: 0;
2156 packed_reads_p
= g_packed_reads
;
2159 words_per_qual
= ( globals
.read_len
+ ( BITS_PER_WORD
- 1 ) ) / BITS_PER_WORD
;
2160 packed_quals
= (word
*) malloc( 1 * words_per_qual
* sizeof(word
) );
2161 last_shift_qual
= globals
.read_len
% BITS_PER_WORD
? ( BITS_PER_WORD
- globals
.read_len
% BITS_PER_WORD
) * BITS_PER_QUAL
: 0;
2163 log("Starting load libraries...\n");
2164 scan_libInfo(lib_file
, &num_libs
, &read_length
);
2166 if(read_length
!= globals
.read_len
)
2168 log( "Ignore max read length in read library file\n");
2169 read_length
= globals
.read_len
;
2173 sprintf(name
, "%s.qual.bit", prefix
);
2174 FILE *fq
= fopen(name
, "w"); //output base quality as 0/1.
2176 sprintf(name
, "%s.index.peGrade", prefix
);
2177 FILE *fg
= fopen(name
, "w"); //output insert size grade.
2179 log( "Totally there are %d libs\n", num_libs
);
2180 for( i
=0; i
< num_libs
; i
++ )
2182 if(lib_array
[i
].asm_flag
!= 1 && lib_array
[i
].asm_flag
!= 3)
2184 if(lib_array
[i
].avg_ins
== 0)
2186 log( "lib %d has %d paired fastqs\n", i
, lib_array
[i
].num_q1_file
);
2187 start_num
= g_num_reads
;
2188 for( j
= 0; j
< lib_array
[i
].num_q1_file
; j
++ )
2190 log( "Loading file: %s ", lib_array
[i
].q1_fname
[j
]);
2191 read_pair_file(fq
, lib_array
[i
].q1_fname
[j
], lib_array
[i
].q2_fname
[j
], &g_num_reads
, globals
.read_len
, lib_array
[i
].qual
);
2192 log( ", Using %lld reads\n", g_num_reads
);
2194 for( j
= 0; j
< lib_array
[i
].num_a1_file
; j
++ )
2196 log( "Loading file: %s ", lib_array
[i
].a1_fname
[j
]);
2197 read_pair_file(fq
, lib_array
[i
].a1_fname
[j
], lib_array
[i
].a2_fname
[j
], &g_num_reads
, globals
.read_len
, lib_array
[i
].qual
);
2198 log( ", Using %lld reads\n", g_num_reads
);
2200 for( j
= 0; j
< lib_array
[i
].num_p_file
; j
++)
2202 log( "Loading file: %s ", lib_array
[i
].p_fname
[j
]);
2203 read_single_file(fq
, lib_array
[i
].p_fname
[j
], &g_num_reads
, globals
.read_len
, lib_array
[i
].qual
);
2204 log( ", Using %lld reads\n", g_num_reads
);
2207 if(g_num_reads
> start_num
)
2208 fprintf(fg
, "%lld %lld %d %d\n", start_num
, g_num_reads
, lib_array
[i
].avg_ins
, lib_array
[i
].sd
);
2211 log( "Starting Single-End libraries to reads...\n");
2212 for( i
=0; i
< num_libs
; i
++ )
2214 if(lib_array
[i
].asm_flag
!= 1 && lib_array
[i
].asm_flag
!= 3)
2216 log( "lib %d has %d single fastqs\n", i
, lib_array
[i
].num_s_q_file
);
2217 for( j
= 0; j
< lib_array
[i
].num_s_q_file
; j
++)
2219 read_single_file(fq
, lib_array
[i
].s_q_fname
[j
], &g_num_reads
, globals
.read_len
, lib_array
[i
].qual
);
2221 for( j
= 0; j
< lib_array
[i
].num_s_a_file
; j
++)
2222 read_single_file(fq
, lib_array
[i
].s_a_fname
[j
], &g_num_reads
, globals
.read_len
, lib_array
[i
].qual
);
2225 globals
.packed_reads
= g_packed_reads
;
2226 globals
.num_reads
= g_num_reads
;
2230 free_libs(lib_array
, num_libs
);
2231 free( packed_quals
);
2232 log("Loading library finished.\n");
2235 int main( int argc
, char** argv
) {
2236 struct sysinfo s_info
;
2241 log( "Usage:\n %s read_library read_len output_prefix [thread_num] [max_host_mem] 2> bwt.log\n", argv
[0] );
2242 log( "Input:\n 1) read_library has the same format to that of SOAPdenovo, except to set qual_cutoff to define high quality threshold\n 2) read length must be set and reads only with fixed length could be used.\n");
2244 log( "Usage:\n %s read_library read_len output_prefix [thread_num] [max_host_mem] [GPU_mem] 2> bwt.log\n", argv
[0] );
2245 log( "Input:\n 1) read_library has the same format to that of SOAPdenovo, except to set qual_cutoff to define high quality threshold\n 2) read length must be set and reads only with fixed length could be used.\n");
2247 log( "Output:\n output_prefix.f.bwt.ascii, output_prefix.r.bwt.ascii, output_prefix.ridt\n" );
2248 log("\nMachine Information:\n"
2249 "RAM: total %lld / free %lld\n"
2250 "Swap: total %lld / free %lld\n"
2251 "Number of CPU = %d\n",
2252 s_info
.totalram
, s_info
.freeram
,
2253 s_info
.totalswap
, s_info
.freeswap
,
2254 sysconf(_SC_NPROCESSORS_ONLN
)
2266 int read_len
= atoi(argv
[2]);
2267 char *output_prefix
= argv
[3];
2268 uint64_t gpu_mem
= 0; // = atoll(argv[4]);
2269 uint64_t max_host_mem
= s_info
.totalram
;
2270 int cpu_count
= sysconf(_SC_NPROCESSORS_ONLN
);
2274 number_cpu_threads
= atoi(argv
[4]);
2275 if(number_cpu_threads
> cpu_count
|| number_cpu_threads
== 0 || number_cpu_threads
> NUM_CPU_THREADS
)
2276 number_cpu_threads
= cpu_count
< NUM_CPU_THREADS
? cpu_count
: NUM_CPU_THREADS
;
2277 log("Using thread number: %d\n", number_cpu_threads
);
2279 number_cpu_threads
= cpu_count
< NUM_CPU_THREADS
? cpu_count
: NUM_CPU_THREADS
;
2280 log("Using thread number: %d\n", number_cpu_threads
);
2285 uint64_t set_host_mem
= atoll(argv
[5]);
2286 if(set_host_mem
> max_host_mem
|| set_host_mem
== 0)
2288 log("Use maximum host memory %lld\n", max_host_mem
);
2290 max_host_mem
= set_host_mem
;
2293 log("Using total RAM %lld\n", max_host_mem
);
2299 size_t free_gpu_mem
= get_gpu_mem();
2300 if(gpu_mem
< 100 || gpu_mem
> free_gpu_mem
)
2302 gpu_mem
= free_gpu_mem
;
2309 gpu_mem
= atoll(argv
[6]);
2311 uint64_t set_gpu_mem
= atoll(argv
[6]);
2312 if(set_gpu_mem
< 100 || set_gpu_mem
> gpu_mem
)
2314 log("Use Maximum GPU memory %lld.\n", gpu_mem
);
2316 gpu_mem
= set_gpu_mem
;
2317 log("Use Set GPU memory %lld.\n", gpu_mem
);
2322 log("Use maximum bucket size.\n");
2324 log("Use Maximum GPU memory %lld.\n", gpu_mem
);
2328 struct global_data_t globals
;
2330 init_global_data(globals
, read_len
, max_host_mem
, number_cpu_threads
);
2333 lib2read( globals
, argv
[1], output_prefix
);
2334 log("Number of reads: %lld\n", globals
.num_reads
);
2337 //set_global_after_reading(globals, max_host_mem);
2338 set_global_bucket(globals
);
2339 //level 1 fill buckets.
2340 uint64_t max_bucket_size
= lv0_fill_buckets(globals
);
2342 //set level2 memeory.
2343 set_global_after_lv0(globals
, max_host_mem
, gpu_mem
, max_bucket_size
);
2345 build_bwt(globals
, output_prefix
, true);
2347 log("Reverse all reads..");
2348 reverse_all_reads(globals
);
2351 lv0_fill_buckets(globals
);
2352 build_bwt(globals
, output_prefix
, false);
2354 log("All finished.\n");
2357 free( globals
.packed_reads
);
2358 free( globals
.buckets
);
2359 free( globals
.lv1_offsets
);
2360 free( globals
.lv2_suffixes
);
2361 free( globals
.lv2_read_id
);
2362 free( globals
.sort_indexes
);
2365 free(globals
.cpu_sort_space
);
2368 pthread_spin_destroy(&globals
.large_diff_lock
);
2370 for ( int t
= 0; t
< globals
.num_cpu_threads
; ++t
) {
2371 free( globals
.section_buckets
[t
] );
2372 free( globals
.locals
[t
].offset_pos
);