4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
23 int visual
= 0; // 1 for output some files , which are useful for visual
24 int * contig_index_array
= NULL
;
27 boolean fill
= 0; // 1 for output some files ,which are useful for the software "kgf"
28 int overlaplen
= 23;//k-mer Size
29 int inGraph
; //for checking whether -g is set, (graph prefix)
30 long long n_ban
; //not used
31 long long n_solexa
= 0; //reads number
32 long long prevNum
= 0; //not used
33 int ins_size_var
= 20; // SD of insert-size
34 PE_INFO
* pes
= NULL
; //record the pe info in lib file
35 MEM_MANAGER
* rv_mem_manager
= NULL
;
36 MEM_MANAGER
* cn_mem_manager
= NULL
;
37 MEM_MANAGER
* arc_mem_manager
= NULL
;
38 unsigned int num_vt
= 0; // num of the end-kmer
39 unsigned long long new_num_vt
= 0; // the new num of the end-kmer after adding the new end-kmer
40 unsigned int ** found_routes
= NULL
;
41 unsigned int * so_far
= NULL
; // recorf the path of contig while filling gap
42 int max_n_routes
= 10;
44 Kmer WORDFILTER
; //mask code for extracting Kmer info from raw data (two unsigned long long int)
45 unsigned int num_ed
= 0; //number of edges
46 unsigned int num_ctg
= 0; // num of contig
47 unsigned int num_ed_limit
; // the count of edge
48 unsigned int extraEdgeNum
; // the new count of edge after adding the new edge
49 EDGE
* edge_array
= NULL
; // used to record all the info of edge
50 VERTEX
* vt_array
= NULL
; // used to record the sequence info of the end-kmer
51 unsigned int * index_array
= NULL
; // used to translate the old contig index to the new contig index
52 CONTIG
* contig_array
= NULL
; // used to record all the info of contig
55 int weakPE
= 3; // the minimun weight requirement for the connection
56 int fillGap
= 0; // 1 for fill the gap after scaffold asm
58 long long arcCounter
; // record the num of the arc
59 MEM_MANAGER
* prearc_mem_manager
= NULL
;
60 MEM_MANAGER
** preArc_mem_managers
= NULL
;
61 int maxReadLen
= 0; //max length will be used for each LIB, soapdenovo read LIBs one by one , for each set a maxReadLen
62 int maxReadLen4all
= 0; //max length will be used for all reads
63 int minReadLen
= 0; // min length will be used for all readss
64 int maxNameLen
= 0; //max length for the name of reads or sequences
65 ARC
** arcLookupTable
= NULL
;
66 long long * markersArray
= NULL
;
67 boolean deLowKmer
= 0; //remove the kmers which coverage are not bigger than deLowKmer
68 boolean deLowEdge
= 1; //remove the edges which coverage are not bigger than deLowEdge
69 long long newCntCounter
; // record the number of the new connection in one insert-size
70 long long discardCntCounter
;
71 boolean repsTie
= 0; //sovle tiny repeat or not
72 CONNECT
** cntLookupTable
= NULL
;
73 int num_libs
= 0; //number of LIBs in read config file
74 LIB_INFO
* lib_array
= NULL
; //store LIB's info into lib_array
75 int libNo
= 0; // the current number of lib
76 long long readNumBack
;
77 int gradsCounter
; //pair number in lib file
78 unsigned int ctg_short
= 0; //shortest contig for scaffolding
79 int thrd_num
= 8; //thread number
80 int cvgAvg
= 0; // the average coverage of contigs
81 KmerSet
** KmerSets
= NULL
; //KmerSet [i] for thread i
82 KmerSet
** KmerSetsPatch
= NULL
; //KmerSet for (k+1) mer
83 DARRAY
* readSeqInGap
= NULL
;
84 DARRAY
* gapSeqDarray
= NULL
;
86 boolean orig2new
; // 1 for re-arrange the contig index using the length
88 boolean maskRep
= 1; // 1 for masking repeat for scaffold asm , 0 for un-masking repeat.
90 int initKmerSetSize
= 0; // init_size = (ubyte8) ((double) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / (double) thrd_num / 24);
91 long known_genome_size
= 0;
92 int smallKmer
= 0; // the kmer of the step "Map"
93 int deltaKmer
= 0; // for map, K-k
99 float ins_var_idx
= 1.5;
100 int Insert_size
= 0; // the current insert-size
102 int COMPATIBLE_MODE
= 0; // 1 for the gz file ; 0 for the normal file
105 MEM_MANAGER
* edgeid_mem_manager
= NULL
;
106 unsigned int num_vtnew
= 0; //new vertex num
107 unsigned int kmer_cnew
= 0; //new kmer num
108 const int step
= 1; //step for multi kmer
112 unsigned int * edge_id
= NULL
; //edge id array
113 VERTEX
* vt_arraynew
= NULL
; //vertex array for k+1mer
115 KmerSet2
* KmerSetsNew
= NULL
; //kmer set for k+1mer
116 char libfilename
[256];
118 unsigned int num_ed_temp
= 0; // record the count of the edge
120 int arcfilter
= 0; //arc filter thrd
121 boolean outputContig
= 0;
123 long long pinCounter
; //the count of the merged bubble
124 int clean
; //merge clean bubble
126 unsigned int num_kmer_limit
;