4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
30 static int caseA
, caseB
, caseC
, caseD
, caseE
;
31 void removeDeadArcs2();
34 void destroyEdge2 ( unsigned int edgeid
)
36 unsigned int bal_ed
= getTwinEdge ( edgeid
);
39 if ( bal_ed
== edgeid
)
41 edge_array
[edgeid
].length
= 0;
45 arc
= edge_array
[edgeid
].arcs
;
49 arc
->bal_arc
->to_ed
= 0;
53 arc
= edge_array
[bal_ed
].arcs
;
57 arc
->bal_arc
->to_ed
= 0;
61 edge_array
[edgeid
].arcs
= NULL
;
62 edge_array
[bal_ed
].arcs
= NULL
;
63 edge_array
[edgeid
].length
= 0;
64 edge_array
[bal_ed
].length
= 0;
65 edge_array
[edgeid
].deleted
= 1;
66 edge_array
[bal_ed
].deleted
= 1;
69 //Count the arc number of edge.
70 ARC
* arcCount2 ( unsigned int edgeid
, unsigned int * num
)
73 ARC
* firstValidArc
= NULL
;
74 unsigned int count
= 0;
75 arc
= edge_array
[edgeid
].arcs
;
84 { firstValidArc
= arc
; }
99 /* multiplicity < multiCutoff
104 /*************************************************
108 If the edge is short and its in-degree(or out-degree) is weak, removes it.
110 1. lenCutoff: the cutoff for the length of edge
111 2. multiCutoff: the cutoff for the weight of preArc
117 *************************************************/
118 void removeWeakEdges2 ( int lenCutoff
, unsigned int multiCutoff
, int mink
)
121 unsigned int arcRight_n
, arcLeft_n
;
122 ARC
* arcLeft
, *arcRight
;
126 fprintf ( stderr
, "Start to destroy weak inner edges.\n" );
133 for ( i
= 1; i
<= num_ed
; i
++ )
135 if ( edge_array
[i
].deleted
|| edge_array
[i
].length
== 0
136 || edge_array
[i
].length
> lenCutoff
137 || EdSameAsTwin ( i
) || edge_array
[i
].multi
|| edge_array
[i
].cvg
== 0 )
140 //keep cvg == 1 from original step
141 // if(edge_array[i].cvg == 1)
143 bal_ed
= getTwinEdge ( i
);
144 arcRight
= arcCount2 ( i
, &arcRight_n
);
146 if ( arcRight_n
> 1 || !arcRight
|| arcRight
->multiplicity
> multiCutoff
)
149 arcLeft
= arcCount2 ( bal_ed
, &arcLeft_n
);
151 if ( arcLeft_n
> 1 || !arcLeft
|| arcLeft
->multiplicity
> multiCutoff
)
158 fprintf ( stderr
, "%d weak inner edge(s) destroyed in cycle %d.\n", counter
, round
++ );
162 // linearConcatenate();
163 // compactEdgeArray();
169 1. ==== - ==== - ====
184 /*************************************************
188 If the edge is short and its coverage is low, removes it.
190 1. lenCutoff: the cutoff for the length of edge
191 2. covCutoff: the cutoff for the coverage of edge
193 4. last: whether it's last iteration
198 *************************************************/
199 void removeLowCovEdges2 ( int lenCutoff
, unsigned short covCutoff
, int mink
, boolean last
)
202 unsigned int arcRight_n
, arcLeft_n
;
203 ARC
* arcLeft
, *arcRight
;
207 for ( i
= 1; i
<= num_ed
; i
++ )
209 if ( edge_array
[i
].deleted
|| edge_array
[i
].cvg
== 0
210 || edge_array
[i
].cvg
> covCutoff
* 10
211 || edge_array
[i
].length
>= lenCutoff
212 || EdSameAsTwin ( i
) || edge_array
[i
].length
== 0 || edge_array
[i
].multi
)
215 //keep cvg == 1 from original SOAPdenovo step
216 // if(edge_array[i].cvg == 1)
218 bal_ed
= getTwinEdge ( i
);
219 arcRight
= arcCount2 ( i
, &arcRight_n
);
220 arcLeft
= arcCount2 ( bal_ed
, &arcLeft_n
);
222 if ( arcLeft_n
< 1 || arcRight_n
< 1 )
229 fprintf ( stderr
, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter
, covCutoff
);
231 linearConcatenate2 ( last
);
235 /*************************************************
239 Check whether the path from edgeid is a tips.
241 1. edgeid: the edge index
242 2. cutLen: the cutoff length
243 3. strict: whether it's the strict mode
247 1 if it's a unreliable tips.
248 *************************************************/
249 boolean
isUnreliableTip2 ( unsigned int edgeid
, int cutLen
, boolean strict
)
251 unsigned int arcRight_n
, arcLeft_n
;
253 unsigned int currentEd
= edgeid
;
255 unsigned int mult
= 0;
256 ARC
* arc
, *activeArc
= NULL
, *tempArc
;
257 unsigned int prevEd
= currentEd
;
262 bal_ed
= getTwinEdge ( edgeid
);
264 if ( bal_ed
== edgeid
)
267 arcCount2 ( bal_ed
, &arcLeft_n
);
275 arcCount2 ( bal_ed
, &arcLeft_n
);
276 tempArc
= arcCount2 ( currentEd
, &arcRight_n
);
278 if ( arcLeft_n
> 1 || arcRight_n
> 1 )
281 length
+= edge_array
[currentEd
].length
;
286 currentEd
= activeArc
->to_ed
;
287 bal_ed
= getTwinEdge ( currentEd
);
293 if ( length
>= cutLen
)
298 if ( currentEd
== 0 )
307 { length
+= edge_array
[currentEd
].length
; }
309 if ( length
>= cutLen
)
324 { fprintf ( stderr
, "No activeArc while checking edge %d.\n", edgeid
); }
326 if ( activeArc
->multiplicity
== 1 )
332 for ( arc
= edge_array
[bal_ed
].arcs
; arc
!= NULL
; arc
= arc
->next
)
333 if ( arc
->multiplicity
> mult
)
334 { mult
= arc
->multiplicity
; }
336 if ( mult
> activeArc
->multiplicity
)
339 return mult
> activeArc
->multiplicity
;
342 boolean
isUnreliableTip_strict2 ( unsigned int edgeid
, int cutLen
)
344 unsigned int arcRight_n
, arcLeft_n
;
346 unsigned int currentEd
= edgeid
;
348 unsigned int mult
= 0;
349 ARC
* arc
, *activeArc
= NULL
, *tempArc
;
354 bal_ed
= getTwinEdge ( edgeid
);
356 if ( bal_ed
== edgeid
)
359 arcCount2 ( bal_ed
, &arcLeft_n
);
366 arcCount2 ( bal_ed
, &arcLeft_n
);
367 tempArc
= arcCount2 ( currentEd
, &arcRight_n
);
369 if ( arcLeft_n
> 1 || arcRight_n
> 1 )
371 if ( arcLeft_n
== 0 || length
== 0 )
377 length
+= edge_array
[currentEd
].length
;
379 if ( length
>= cutLen
)
385 currentEd
= activeArc
->to_ed
;
386 bal_ed
= getTwinEdge ( currentEd
);
392 if ( currentEd
== 0 )
399 { fprintf ( stderr
, "No activeArc while checking edge %d.\n", edgeid
); }
401 if ( activeArc
->multiplicity
== 1 )
407 for ( arc
= edge_array
[bal_ed
].arcs
; arc
!= NULL
; arc
= arc
->next
)
408 if ( arc
->multiplicity
> mult
)
409 { mult
= arc
->multiplicity
; }
411 if ( mult
> activeArc
->multiplicity
)
414 return mult
> activeArc
->multiplicity
;
417 //Remove the arcs that set to 0.
418 void removeDeadArcs2()
420 unsigned int i
, count
= 0;
421 ARC
* arc
, *arc_temp
;
423 for ( i
= 1; i
<= num_ed
; i
++ )
425 arc
= edge_array
[i
].arcs
;
432 if ( arc_temp
->to_ed
== 0 )
435 edge_array
[i
].arcs
= deleteArc ( edge_array
[i
].arcs
, arc_temp
);
440 fprintf ( stderr
, "%d dead arc(s) removed.\n", count
);
443 /*************************************************
449 1. cutLen: the cutoff for the total length of the tips
450 2. strict: the pattern of the cutting tips
451 3. last: whether it's last iterate
456 *************************************************/
457 void cutTipsInGraph2 ( int cutLen
, boolean strict
, boolean last
)
463 { cutLen
= 2 * overlaplen
; }
465 fprintf ( stderr
, "\nStrict: %d, cutoff length: %d.\n", strict
, cutLen
);
468 { linearConcatenate2 ( last
); }
470 caseA
= caseB
= caseC
= caseD
= caseE
= 0;
477 for ( i
= 1; i
<= num_ed
; i
++ )
479 if ( edge_array
[i
].deleted
)
482 if ( !edge_array
[i
].multi
&& isUnreliableTip2 ( i
, cutLen
, strict
) )
489 fprintf ( stderr
, "%d tips cut in cycle %d.\n", flag
, round
++ );
495 { fprintf ( stderr
, "Case A %d, B %d C %d D %d E %d.\n", caseA
, caseB
, caseC
, caseD
, caseE
); }
497 linearConcatenate2 ( last
);