modified: makefile
[GalaxyCodeBases.git] / BGI / SOAPdenovo2 / standardPregraph / cutTip_graph2.c
blob9ab776cc98dc83fdbbc5b2adddca64b7fbe77007
1 /*
2 * cutTip_graph2.c
4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
23 #include "stdinc.h"
24 #include "newhash.h"
25 #include "kmerhash.h"
26 #include "extfunc.h"
27 #include "extvab.h"
30 static int caseA, caseB, caseC, caseD, caseE;
31 void removeDeadArcs2();
33 //Destroy the edge.
34 void destroyEdge2 ( unsigned int edgeid )
36 unsigned int bal_ed = getTwinEdge ( edgeid );
37 ARC * arc;
39 if ( bal_ed == edgeid )
41 edge_array[edgeid].length = 0;
42 return;
45 arc = edge_array[edgeid].arcs;
47 while ( arc )
49 arc->bal_arc->to_ed = 0;
50 arc = arc->next;
53 arc = edge_array[bal_ed].arcs;
55 while ( arc )
57 arc->bal_arc->to_ed = 0;
58 arc = arc->next;
61 edge_array[edgeid].arcs = NULL;
62 edge_array[bal_ed].arcs = NULL;
63 edge_array[edgeid].length = 0;
64 edge_array[bal_ed].length = 0;
65 edge_array[edgeid].deleted = 1;
66 edge_array[bal_ed].deleted = 1;
69 //Count the arc number of edge.
70 ARC * arcCount2 ( unsigned int edgeid, unsigned int * num )
72 ARC * arc;
73 ARC * firstValidArc = NULL;
74 unsigned int count = 0;
75 arc = edge_array[edgeid].arcs;
77 while ( arc )
79 if ( arc->to_ed > 0 )
81 count++;
83 if ( count == 1 )
84 { firstValidArc = arc; }
85 else if ( count > 1 )
87 *num = count;
88 return firstValidArc;
92 arc = arc->next;
95 *num = count;
96 return firstValidArc;
99 /* multiplicity < multiCutoff
100 ==== - ==== - ====
101 length < lenCutoff
104 /*************************************************
105 Function:
106 removeWeakEdges2
107 Description:
108 If the edge is short and its in-degree(or out-degree) is weak, removes it.
109 Input:
110 1. lenCutoff: the cutoff for the length of edge
111 2. multiCutoff: the cutoff for the weight of preArc
112 3. mink: the min k
113 Output:
114 None.
115 Return:
116 None.
117 *************************************************/
118 void removeWeakEdges2 ( int lenCutoff, unsigned int multiCutoff, int mink )
120 unsigned int bal_ed;
121 unsigned int arcRight_n, arcLeft_n;
122 ARC * arcLeft, *arcRight;
123 unsigned int i;
124 int counter = 0;
125 int round = 1;
126 fprintf ( stderr, "Start to destroy weak inner edges.\n" );
127 counter = 1;
129 while ( counter )
131 counter = 0;
133 for ( i = 1; i <= num_ed; i++ )
135 if ( edge_array[i].deleted || edge_array[i].length == 0
136 || edge_array[i].length > lenCutoff
137 || EdSameAsTwin ( i ) || edge_array[i].multi || edge_array[i].cvg == 0 )
138 { continue; }
140 //keep cvg == 1 from original step
141 // if(edge_array[i].cvg == 1)
142 // continue;
143 bal_ed = getTwinEdge ( i );
144 arcRight = arcCount2 ( i, &arcRight_n );
146 if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
147 { continue; }
149 arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
151 if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
152 { continue; }
154 destroyEdge2 ( i );
155 counter++;
158 fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
161 removeDeadArcs2();
162 // linearConcatenate();
163 // compactEdgeArray();
167 cvg < covCutoff
169 1. ==== - ==== - ====
171 ====
172 2. ==== - ==== <
173 ====
174 ====
175 3. > ==== - ====
176 ====
178 ==== ====
179 4. > ==== <
180 ==== ====
182 length < lenCutoff
184 /*************************************************
185 Function:
186 removeLowCovEdges2
187 Description:
188 If the edge is short and its coverage is low, removes it.
189 Input:
190 1. lenCutoff: the cutoff for the length of edge
191 2. covCutoff: the cutoff for the coverage of edge
192 3. mink: the min k
193 4. last: whether it's last iteration
194 Output:
195 None.
196 Return:
197 None.
198 *************************************************/
199 void removeLowCovEdges2 ( int lenCutoff, unsigned short covCutoff, int mink, boolean last )
201 unsigned int bal_ed;
202 unsigned int arcRight_n, arcLeft_n;
203 ARC * arcLeft, *arcRight;
204 unsigned int i;
205 int counter = 0;
207 for ( i = 1; i <= num_ed; i++ )
209 if ( edge_array[i].deleted || edge_array[i].cvg == 0
210 || edge_array[i].cvg > covCutoff * 10
211 || edge_array[i].length >= lenCutoff
212 || EdSameAsTwin ( i ) || edge_array[i].length == 0 || edge_array[i].multi )
213 { continue; }
215 //keep cvg == 1 from original SOAPdenovo step
216 // if(edge_array[i].cvg == 1)
217 // continue;
218 bal_ed = getTwinEdge ( i );
219 arcRight = arcCount2 ( i, &arcRight_n );
220 arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
222 if ( arcLeft_n < 1 || arcRight_n < 1 )
223 { continue; }
225 destroyEdge2 ( i );
226 counter++;
229 fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
230 removeDeadArcs2();
231 linearConcatenate2 ( last );
232 compactEdgeArray();
235 /*************************************************
236 Function:
237 isUnreliableTip2
238 Description:
239 Check whether the path from edgeid is a tips.
240 Input:
241 1. edgeid: the edge index
242 2. cutLen: the cutoff length
243 3. strict: whether it's the strict mode
244 Output:
245 None.
246 Return:
247 1 if it's a unreliable tips.
248 *************************************************/
249 boolean isUnreliableTip2 ( unsigned int edgeid, int cutLen, boolean strict )
251 unsigned int arcRight_n, arcLeft_n;
252 unsigned int bal_ed;
253 unsigned int currentEd = edgeid;
254 int length = 0;
255 unsigned int mult = 0;
256 ARC * arc, *activeArc = NULL, *tempArc;
257 unsigned int prevEd = currentEd;
259 if ( edgeid == 0 )
260 { return 0; }
262 bal_ed = getTwinEdge ( edgeid );
264 if ( bal_ed == edgeid )
265 { return 0; }
267 arcCount2 ( bal_ed, &arcLeft_n );
269 if ( arcLeft_n > 0 )
270 { return 0; }
272 while ( currentEd )
274 prevEd = currentEd;
275 arcCount2 ( bal_ed, &arcLeft_n );
276 tempArc = arcCount2 ( currentEd, &arcRight_n );
278 if ( arcLeft_n > 1 || arcRight_n > 1 )
279 { break; }
281 length += edge_array[currentEd].length;
283 if ( tempArc )
285 activeArc = tempArc;
286 currentEd = activeArc->to_ed;
287 bal_ed = getTwinEdge ( currentEd );
289 else
290 { currentEd = 0; }
293 if ( length >= cutLen )
295 return 0;
298 if ( currentEd == 0 )
300 caseB++;
301 return 0;
304 if ( !strict )
306 if ( arcLeft_n < 2 )
307 { length += edge_array[currentEd].length; }
309 if ( length >= cutLen )
310 { return 0; }
311 else
313 caseC++;
314 return 1;
318 if ( arcLeft_n < 2 )
320 return 0;
323 if ( !activeArc )
324 { fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid ); }
326 if ( activeArc->multiplicity == 1 )
328 caseD++;
329 return 1;
332 for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
333 if ( arc->multiplicity > mult )
334 { mult = arc->multiplicity; }
336 if ( mult > activeArc->multiplicity )
337 { caseE++; }
339 return mult > activeArc->multiplicity;
342 boolean isUnreliableTip_strict2 ( unsigned int edgeid, int cutLen )
344 unsigned int arcRight_n, arcLeft_n;
345 unsigned int bal_ed;
346 unsigned int currentEd = edgeid;
347 int length = 0;
348 unsigned int mult = 0;
349 ARC * arc, *activeArc = NULL, *tempArc;
351 if ( edgeid == 0 )
352 { return 0; }
354 bal_ed = getTwinEdge ( edgeid );
356 if ( bal_ed == edgeid )
357 { return 0; }
359 arcCount2 ( bal_ed, &arcLeft_n );
361 if ( arcLeft_n > 0 )
362 { return 0; }
364 while ( currentEd )
366 arcCount2 ( bal_ed, &arcLeft_n );
367 tempArc = arcCount2 ( currentEd, &arcRight_n );
369 if ( arcLeft_n > 1 || arcRight_n > 1 )
371 if ( arcLeft_n == 0 || length == 0 )
372 { return 0; }
373 else
374 { break; }
377 length += edge_array[currentEd].length;
379 if ( length >= cutLen )
380 { return 0; }
382 if ( tempArc )
384 activeArc = tempArc;
385 currentEd = activeArc->to_ed;
386 bal_ed = getTwinEdge ( currentEd );
388 else
389 { currentEd = 0; }
392 if ( currentEd == 0 )
394 caseA++;
395 return 1;
398 if ( !activeArc )
399 { fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid ); }
401 if ( activeArc->multiplicity == 1 )
403 caseB++;
404 return 1;
407 for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
408 if ( arc->multiplicity > mult )
409 { mult = arc->multiplicity; }
411 if ( mult > activeArc->multiplicity )
412 { caseC++; }
414 return mult > activeArc->multiplicity;
417 //Remove the arcs that set to 0.
418 void removeDeadArcs2()
420 unsigned int i, count = 0;
421 ARC * arc, *arc_temp;
423 for ( i = 1; i <= num_ed; i++ )
425 arc = edge_array[i].arcs;
427 while ( arc )
429 arc_temp = arc;
430 arc = arc->next;
432 if ( arc_temp->to_ed == 0 )
434 count++;
435 edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
440 fprintf ( stderr, "%d dead arc(s) removed.\n", count );
443 /*************************************************
444 Function:
445 cutTipsInGraph2
446 Description:
447 Clip the short tips.
448 Input:
449 1. cutLen: the cutoff for the total length of the tips
450 2. strict: the pattern of the cutting tips
451 3. last: whether it's last iterate
452 Output:
453 None.
454 Return:
455 None.
456 *************************************************/
457 void cutTipsInGraph2 ( int cutLen, boolean strict, boolean last )
459 int flag = 1;
460 unsigned int i;
462 if ( !cutLen )
463 { cutLen = 2 * overlaplen; }
465 fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
467 if ( strict )
468 { linearConcatenate2 ( last ); }
470 caseA = caseB = caseC = caseD = caseE = 0;
471 int round = 1;
473 while ( flag )
475 flag = 0;
477 for ( i = 1; i <= num_ed; i++ )
479 if ( edge_array[i].deleted )
480 { continue; }
482 if ( !edge_array[i].multi && isUnreliableTip2 ( i, cutLen, strict ) )
484 destroyEdge2 ( i );
485 flag++;
489 fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
492 removeDeadArcs2();
494 if ( strict )
495 { fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE ); }
497 linearConcatenate2 ( last );
498 compactEdgeArray();