4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 static int caseA
, caseB
, caseC
, caseD
, caseE
;
31 /*************************************************
35 Removes the edge and its arc.
37 1. edgeid: the edge index.
42 *************************************************/
43 void destroyEdge ( unsigned int edgeid
)
45 unsigned int bal_ed
= getTwinEdge ( edgeid
);
48 if ( bal_ed
== edgeid
)
50 edge_array
[edgeid
].length
= 0;
54 arc
= edge_array
[edgeid
].arcs
;
58 arc
->bal_arc
->to_ed
= 0;
62 arc
= edge_array
[bal_ed
].arcs
;
66 arc
->bal_arc
->to_ed
= 0;
70 edge_array
[edgeid
].arcs
= NULL
;
71 edge_array
[bal_ed
].arcs
= NULL
;
72 edge_array
[edgeid
].length
= 0;
73 edge_array
[bal_ed
].length
= 0;
74 edge_array
[edgeid
].deleted
= 1;
75 edge_array
[bal_ed
].deleted
= 1;
76 //printf("Destroyed %d and %d\n",edgeid,bal_ed);
79 /*************************************************
83 Computes the count of arc of the edge.
85 1. edgeid: the edge index
86 2. num: used to record the count of arc of the edge
90 The first arc of the edge.
91 *************************************************/
92 ARC
* arcCount ( unsigned int edgeid
, unsigned int * num
)
95 ARC
* firstValidArc
= NULL
;
96 unsigned int count
= 0;
97 arc
= edge_array
[edgeid
].arcs
;
101 if ( arc
->to_ed
> 0 )
109 else if ( count
> 1 )
112 return firstValidArc
;
120 return firstValidArc
;
123 /* multiplicity < multiCutoff
127 /*************************************************
131 Checks if the edge is short and its in-degree(or out-degree) is weak.
133 1. lenCutoff: the cutoff for the length of edge
134 2. multiCutoff: the cutoff for the weight of preArc
139 *************************************************/
140 void removeWeakEdges ( int lenCutoff
, unsigned int multiCutoff
)
143 unsigned int arcRight_n
, arcLeft_n
;
144 ARC
* arcLeft
, *arcRight
;
148 fprintf ( stderr
, "Start to destroy weak inner edges.\n" );
155 for ( i
= 1; i
<= num_ed
; i
++ )
157 if ( edge_array
[i
].deleted
|| edge_array
[i
].length
== 0 || edge_array
[i
].length
> lenCutoff
|| EdSameAsTwin ( i
) )
162 bal_ed
= getTwinEdge ( i
);
163 arcRight
= arcCount ( i
, &arcRight_n
);
165 if ( arcRight_n
> 1 || !arcRight
|| arcRight
->multiplicity
> multiCutoff
)
170 arcLeft
= arcCount ( bal_ed
, &arcLeft_n
);
172 if ( arcLeft_n
> 1 || !arcLeft
|| arcLeft
->multiplicity
> multiCutoff
)
181 fprintf ( stderr
, "%d weak inner edge(s) destroyed in cycle %d.\n", counter
, round
++ );
194 1. ==== - ==== - ====
209 /*************************************************
213 Checks if the edge is short and its coverage is low.
215 1. lenCutoff: the cutoff for the length of edge.
216 2. covCutoff: the cutoff for the coverage of edge.
221 *************************************************/
222 void removeLowCovEdges ( int lenCutoff
, unsigned short covCutoff
, boolean last
)
225 unsigned int arcRight_n
, arcLeft_n
;
226 ARC
* arcLeft
, *arcRight
;
230 for ( i
= 1; i
<= num_ed
; i
++ )
232 if ( edge_array
[i
].deleted
|| edge_array
[i
].cvg
== 0 || edge_array
[i
].cvg
> covCutoff
* 10 || edge_array
[i
].length
>= lenCutoff
|| EdSameAsTwin ( i
) || edge_array
[i
].length
== 0 )
237 bal_ed
= getTwinEdge ( i
);
238 arcRight
= arcCount ( i
, &arcRight_n
);
239 arcLeft
= arcCount ( bal_ed
, &arcLeft_n
);
241 if ( arcLeft_n
< 1 || arcRight_n
< 1 )
250 fprintf ( stderr
, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter
, covCutoff
);
252 linearConcatenate ( 0, last
);
256 /*************************************************
260 Checks if the tips is suitable to cut.
262 1. edgeid: the edge index
263 2. cutLen: the length requirement for tips
264 3. strict: the choice for the pattern of cutting tips
268 0 if the tip couldn't be cut.
269 *************************************************/
270 boolean
isUnreliableTip ( unsigned int edgeid
, int cutLen
, boolean strict
)
272 unsigned int arcRight_n
, arcLeft_n
;
274 unsigned int currentEd
= edgeid
;
276 unsigned int mult
= 0;
277 ARC
* arc
, *activeArc
= NULL
, *tempArc
;
284 bal_ed
= getTwinEdge ( edgeid
);
286 if ( bal_ed
== edgeid
)
291 arcCount ( bal_ed
, &arcLeft_n
);
300 arcCount ( bal_ed
, &arcLeft_n
);
301 tempArc
= arcCount ( currentEd
, &arcRight_n
);
303 if ( arcLeft_n
> 1 || arcRight_n
> 1 )
308 length
+= edge_array
[currentEd
].length
;
313 currentEd
= activeArc
->to_ed
;
314 bal_ed
= getTwinEdge ( currentEd
);
322 if ( length
>= cutLen
)
327 if ( currentEd
== 0 )
337 length
+= edge_array
[currentEd
].length
;
340 if ( length
>= cutLen
)
358 fprintf ( stderr
, "No activeArc while checking edge %d.\n", edgeid
);
361 if ( activeArc
->multiplicity
== 1 )
367 for ( arc
= edge_array
[bal_ed
].arcs
; arc
!= NULL
; arc
= arc
->next
)
368 if ( arc
->multiplicity
> mult
)
370 mult
= arc
->multiplicity
;
373 if ( mult
> activeArc
->multiplicity
)
378 return mult
> activeArc
->multiplicity
;
381 boolean
isUnreliableTip_strict ( unsigned int edgeid
, int cutLen
)
383 unsigned int arcRight_n
, arcLeft_n
;
385 unsigned int currentEd
= edgeid
;
387 unsigned int mult
= 0;
388 ARC
* arc
, *activeArc
= NULL
, *tempArc
;
395 bal_ed
= getTwinEdge ( edgeid
);
397 if ( bal_ed
== edgeid
)
402 arcCount ( bal_ed
, &arcLeft_n
);
411 arcCount ( bal_ed
, &arcLeft_n
);
412 tempArc
= arcCount ( currentEd
, &arcRight_n
);
414 if ( arcLeft_n
> 1 || arcRight_n
> 1 )
416 if ( arcLeft_n
== 0 || length
== 0 )
426 length
+= edge_array
[currentEd
].length
;
428 if ( length
>= cutLen
)
436 currentEd
= activeArc
->to_ed
;
437 bal_ed
= getTwinEdge ( currentEd
);
445 if ( currentEd
== 0 )
453 fprintf ( stderr
, "No activeArc while checking edge %d.\n", edgeid
);
456 if ( activeArc
->multiplicity
== 1 )
462 for ( arc
= edge_array
[bal_ed
].arcs
; arc
!= NULL
; arc
= arc
->next
)
463 if ( arc
->multiplicity
> mult
)
465 mult
= arc
->multiplicity
;
468 if ( mult
> activeArc
->multiplicity
)
473 return mult
> activeArc
->multiplicity
;
476 /*************************************************
480 Removes unvalid arcs.
487 *************************************************/
488 void removeDeadArcs ()
490 unsigned int i
, count
= 0;
491 ARC
* arc
, *arc_temp
;
493 for ( i
= 1; i
<= num_ed
; i
++ )
495 arc
= edge_array
[i
].arcs
;
502 if ( arc_temp
->to_ed
== 0 )
505 edge_array
[i
].arcs
= deleteArc ( edge_array
[i
].arcs
, arc_temp
);
510 fprintf ( stderr
, "%d dead arc(s) removed.\n", count
);
513 /*************************************************
519 1. cutLen: the cutoff for the total length of the tips
520 2. strict: the pattern of the cutting tips
525 *************************************************/
526 void cutTipsInGraph ( int cutLen
, boolean strict
, boolean last
)
533 cutLen
= 2 * overlaplen
;
536 fprintf ( stderr
, "\nStrict: %d, cutoff length: %d.\n", strict
, cutLen
);
540 linearConcatenate ( 0, last
);
543 caseA
= caseB
= caseC
= caseD
= caseE
= 0;
550 for ( i
= 1; i
<= num_ed
; i
++ )
552 if ( edge_array
[i
].deleted
)
557 if ( isUnreliableTip ( i
, cutLen
, strict
) )
564 fprintf ( stderr
, "%d tips cut in cycle %d.\n", flag
, round
++ );
571 fprintf ( stderr
, "Case A %d, B %d C %d D %d E %d.\n", caseA
, caseB
, caseC
, caseD
, caseE
);
574 linearConcatenate ( 0, last
);