4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
28 static void initenv ( int argc
, char ** argv
);
29 static void display_contig_usage ();
30 char shortrdsfile
[256], graphfile
[256];
31 static boolean repeatSolve
; //whether solve repeat or not
32 //static boolean keepReadFile = 0; //whether keep tmp selected reads file or not
33 static boolean iter
= 0; //whether use multikmer or not
34 static boolean cleanBubble
= 0; //whether merge clean bubble before iterate
35 static int M
= 1; //merge bubble level
36 static int maxk
= 0; //max kmer of multikmer
38 /*************************************************
42 The main function for contig step . its processes are as below:
44 2. Merge bubble(clean bubble is optional for multikmer)
45 3. Remove weak edge and low coverage edge
47 5. Iterate multikmer(optional)
49 @see display_contig_usage ()
57 6. *.preGraphBasic [optional]
60 *************************************************/
61 int call_heavygraph ( int argc
, char ** argv
)
63 time_t start_t
, stop_t
, time_bef
, time_aft
;
66 fprintf ( stderr
, "\n********************\n" );
67 fprintf ( stderr
, "Contig\n" );
68 fprintf ( stderr
, "********************\n\n" );
69 initenv ( argc
, argv
);
70 loadVertex ( graphfile
);
71 loadEdge ( graphfile
);
75 ret
= loadPathBin ( graphfile
);
86 // ret = loadPathBin (graphfile);
93 fprintf ( stderr
, "Repeat solving can't be done...\n" );
97 fprintf ( stderr
, "Time spent on solving repeat: %ds.\n", ( int ) ( time_aft
- time_bef
) );
100 //edgecvg_bar(edge_array,num_ed,graphfile,100);
102 if ( !iter
&& M
> 0 )
105 bubblePinch ( 0.90, graphfile
, M
, 0, 1 );
107 fprintf ( stderr
, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft
- time_bef
) );
110 if ( iter
&& cleanBubble
&& M
> 0 )
114 long long oldpinCounter
= 0;
122 if ( times
>= 4 ) { break; }
124 bubblePinch ( 0.90, graphfile
, M
, 1, 0 );
126 fprintf ( stderr
, "%lld clean bubbles merged.\n", pinCounter
);
130 fprintf ( stderr
, "Time spent on pinching clean bubbles: %ds.\n", ( int ) ( time_aft
- time_bef
) );
136 removeWeakEdges ( 2 * overlaplen
, 1 );
137 removeLowCovEdges ( 2 * overlaplen
, deLowEdge
, !iter
);
140 cutTipsInGraph ( 0, 0, !iter
);
144 Iterate ( shortrdsfile
, graphfile
, maxk
, M
); //keepReadFile,
149 bubblePinch ( 0.90, graphfile
, M
, 1, 0 );
151 fprintf ( stderr
, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft
- time_bef
) );
154 freshpreGraphBasic ( iter
, maxk
, graphfile
);
157 //output_graph(graphfile);
158 output_contig ( edge_array
, num_ed
, graphfile
, overlaplen
+ 1 );
159 output_updated_edges ( graphfile
);
160 output_heavyArcs ( graphfile
);
164 free ( ( void * ) vt_array
);
170 free_edge_array ( edge_array
, num_ed_limit
);
176 fprintf ( stderr
, "\nTime spent on constructing contig: %dm.\n\n", ( int ) ( stop_t
- start_t
) / 60 );
181 /*****************************************************************************
182 * Parse command line switches
183 *****************************************************************************/
184 void initenv ( int argc
, char ** argv
)
188 extern char * optarg
;
190 inpseq
= outseq
= repeatSolve
= iter
= cleanBubble
= 0;//keepReadFile =
192 fprintf ( stderr
, "Parameters: contig " );
194 while ( ( copt
= getopt ( argc
, argv
, "g:M:D:Rs:m:p:e:E" ) ) != EOF
) // r
199 fprintf ( stderr
, "-M %s ", optarg
);
200 sscanf ( optarg
, "%s", temp
);
204 fprintf ( stderr
, "-D %s ", optarg
);
205 sscanf ( optarg
, "%s", temp
);
206 deLowEdge
= atoi ( temp
) >= 0 ? atoi ( temp
) : 0;
209 fprintf ( stderr
, "-g %s ", optarg
);
211 sscanf ( optarg
, "%s", graphfile
);
215 fprintf ( stderr
, "-R " );
218 fprintf ( stderr
, "-s %s ", optarg
);
220 sscanf ( optarg
, "%s", shortrdsfile
);
223 fprintf ( stderr
, "-m %s ", optarg
);
225 sscanf ( optarg
, "%s", temp
);
226 maxk
= atoi ( temp
);
231 fprintf(stderr, "-r ");
235 fprintf ( stderr
, "-e %s ", optarg
);
236 sscanf ( optarg
, "%s", temp
);
237 arcfilter
= atoi ( temp
);
240 fprintf ( stderr
, "-p %s ", optarg
);
241 sscanf ( optarg
, "%s", temp
);
242 thrd_num
= atoi ( temp
);
246 fprintf ( stderr
, "-E " );
250 if ( ( iter
&& inpseq
== 0 ) || inGraph
== 0 )
252 display_contig_usage ();
258 fprintf ( stderr
, "\n\n" );
265 fprintf ( stderr
, "Max K should be an odd number, change to %d.\n", maxk
);
271 fprintf ( stderr
, "Max K should not be less than 13, change to %d.\n", maxk
);
275 else if ( maxk
> 127 )
278 fprintf ( stderr
, "Max K should not be greater than 127, change to %d.\n", maxk
);
282 else if ( maxk
> 63 )
285 fprintf ( stderr
, "Max K should not be greater than 63, change to %d.\n", maxk
);
290 if ( maxk
<= overlaplen
)
292 fprintf ( stderr
, "Max K %d is not greater than overlaplen %d.\n", maxk
, overlaplen
);
293 display_contig_usage ();
298 if ( ( iter
&& inpseq
== 0 ) || inGraph
== 0 )
300 display_contig_usage ();
305 static void display_contig_usage ()
307 fprintf ( stderr
, "\ncontig -g InputGraph [-R] [-M mergeLevel -D EdgeCovCutoff] [-s readsInfoFile -m maxkmer -p n_cpu -r]\n" );
308 fprintf ( stderr
, " -g <string> inputGraph: prefix of input graph file names\n" );
309 fprintf ( stderr
, " -R (optional) resolve repeats using information generated in pregraph step, works only if -R is set in pregraph step too, [NO]\n" );
310 fprintf ( stderr
, " -M <int> mergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n" );
311 fprintf ( stderr
, " -D <int> EdgeCovCutoff: edges shorter than (2*K+1) with coverage no larger than EdgeCovCutoff will be deleted, [1]\n" );
312 fprintf ( stderr
, " -e <int> arcWeight: two edges, between which the arc's weight is larger than arcWeight, will be linerized, [0]\n" );
313 fprintf ( stderr
, " -m <int> max k when using multi-kmer, and the parameters below are used along with multi-kmer, [NO]\n" );
314 fprintf ( stderr
, " -s <string> readsInfoFile:The file contains information of solexa reads(It's necessary when using multi-kmer)\n" );
315 fprintf ( stderr
, " -p <int> number of cpu, [8]\n" );
316 fprintf ( stderr
, " -E (optional) merge clean bubble before iterate, works only if -M is set when using multi-kmer, [NO]\n" );
317 // fprintf (stderr," -r (optional) keep available read(*.read)\n");