modified: makefile
[GalaxyCodeBases.git] / BGI / SOAPdenovo2 / standardPregraph / cutTip_graph.c
blob651c1f88fd29fac88dfa48ddd17fb09c088ecc28
1 /*
2 * cutTip_graph.c
4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
23 #include "stdinc.h"
24 #include "newhash.h"
25 #include "kmerhash.h"
26 #include "extfunc.h"
27 #include "extvab.h"
29 static int caseA, caseB, caseC, caseD, caseE;
31 /*************************************************
32 Function:
33 destroyEdge
34 Description:
35 Removes the edge and its arc.
36 Input:
37 1. edgeid: the edge index.
38 Output:
39 None.
40 Return:
41 None.
42 *************************************************/
43 void destroyEdge ( unsigned int edgeid )
45 unsigned int bal_ed = getTwinEdge ( edgeid );
46 ARC * arc;
48 if ( bal_ed == edgeid )
50 edge_array[edgeid].length = 0;
51 return;
54 arc = edge_array[edgeid].arcs;
56 while ( arc )
58 arc->bal_arc->to_ed = 0;
59 arc = arc->next;
62 arc = edge_array[bal_ed].arcs;
64 while ( arc )
66 arc->bal_arc->to_ed = 0;
67 arc = arc->next;
70 edge_array[edgeid].arcs = NULL;
71 edge_array[bal_ed].arcs = NULL;
72 edge_array[edgeid].length = 0;
73 edge_array[bal_ed].length = 0;
74 edge_array[edgeid].deleted = 1;
75 edge_array[bal_ed].deleted = 1;
76 //printf("Destroyed %d and %d\n",edgeid,bal_ed);
79 /*************************************************
80 Function:
81 arcCount
82 Description:
83 Computes the count of arc of the edge.
84 Input:
85 1. edgeid: the edge index
86 2. num: used to record the count of arc of the edge
87 Output:
88 None.
89 Return:
90 The first arc of the edge.
91 *************************************************/
92 ARC * arcCount ( unsigned int edgeid, unsigned int * num )
94 ARC * arc;
95 ARC * firstValidArc = NULL;
96 unsigned int count = 0;
97 arc = edge_array[edgeid].arcs;
99 while ( arc )
101 if ( arc->to_ed > 0 )
103 count++;
105 if ( count == 1 )
107 firstValidArc = arc;
109 else if ( count > 1 )
111 *num = count;
112 return firstValidArc;
116 arc = arc->next;
119 *num = count;
120 return firstValidArc;
123 /* multiplicity < multiCutoff
124 ==== - ==== - ====
125 length < lenCutoff
127 /*************************************************
128 Function:
129 removeWeakEdges
130 Description:
131 Checks if the edge is short and its in-degree(or out-degree) is weak.
132 Input:
133 1. lenCutoff: the cutoff for the length of edge
134 2. multiCutoff: the cutoff for the weight of preArc
135 Output:
136 None.
137 Return:
138 None.
139 *************************************************/
140 void removeWeakEdges ( int lenCutoff, unsigned int multiCutoff )
142 unsigned int bal_ed;
143 unsigned int arcRight_n, arcLeft_n;
144 ARC * arcLeft, *arcRight;
145 unsigned int i;
146 int counter = 0;
147 int round = 1;
148 fprintf ( stderr, "Start to destroy weak inner edges.\n" );
149 counter = 1;
151 while ( counter )
153 counter = 0;
155 for ( i = 1; i <= num_ed; i++ )
157 if ( edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin ( i ) )
159 continue;
162 bal_ed = getTwinEdge ( i );
163 arcRight = arcCount ( i, &arcRight_n );
165 if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
167 continue;
170 arcLeft = arcCount ( bal_ed, &arcLeft_n );
172 if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
174 continue;
177 destroyEdge ( i );
178 counter++;
181 fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
184 removeDeadArcs ();
186 linearConcatenate();
187 compactEdgeArray();
192 cvg < covCutoff
194 1. ==== - ==== - ====
196 ====
197 2. ==== - ==== <
198 ====
199 ====
200 3. > ==== - ====
201 ====
203 ==== ====
204 4. > ==== <
205 ==== ====
207 length < lenCutoff
209 /*************************************************
210 Function:
211 removeLowCovEdges
212 Description:
213 Checks if the edge is short and its coverage is low.
214 Input:
215 1. lenCutoff: the cutoff for the length of edge.
216 2. covCutoff: the cutoff for the coverage of edge.
217 Output:
218 None.
219 Return:
220 None.
221 *************************************************/
222 void removeLowCovEdges ( int lenCutoff, unsigned short covCutoff, boolean last )
224 unsigned int bal_ed;
225 unsigned int arcRight_n, arcLeft_n;
226 ARC * arcLeft, *arcRight;
227 unsigned int i;
228 int counter = 0;
230 for ( i = 1; i <= num_ed; i++ )
232 if ( edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin ( i ) || edge_array[i].length == 0 )
234 continue;
237 bal_ed = getTwinEdge ( i );
238 arcRight = arcCount ( i, &arcRight_n );
239 arcLeft = arcCount ( bal_ed, &arcLeft_n );
241 if ( arcLeft_n < 1 || arcRight_n < 1 )
243 continue;
246 destroyEdge ( i );
247 counter++;
250 fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
251 removeDeadArcs ();
252 linearConcatenate ( 0, last );
253 compactEdgeArray ();
256 /*************************************************
257 Function:
258 isUnreliableTip
259 Description:
260 Checks if the tips is suitable to cut.
261 Input:
262 1. edgeid: the edge index
263 2. cutLen: the length requirement for tips
264 3. strict: the choice for the pattern of cutting tips
265 Output:
266 None.
267 Return:
268 0 if the tip couldn't be cut.
269 *************************************************/
270 boolean isUnreliableTip ( unsigned int edgeid, int cutLen, boolean strict )
272 unsigned int arcRight_n, arcLeft_n;
273 unsigned int bal_ed;
274 unsigned int currentEd = edgeid;
275 int length = 0;
276 unsigned int mult = 0;
277 ARC * arc, *activeArc = NULL, *tempArc;
279 if ( edgeid == 0 )
281 return 0;
284 bal_ed = getTwinEdge ( edgeid );
286 if ( bal_ed == edgeid )
288 return 0;
291 arcCount ( bal_ed, &arcLeft_n );
293 if ( arcLeft_n > 0 )
295 return 0;
298 while ( currentEd )
300 arcCount ( bal_ed, &arcLeft_n );
301 tempArc = arcCount ( currentEd, &arcRight_n );
303 if ( arcLeft_n > 1 || arcRight_n > 1 )
305 break;
308 length += edge_array[currentEd].length;
310 if ( tempArc )
312 activeArc = tempArc;
313 currentEd = activeArc->to_ed;
314 bal_ed = getTwinEdge ( currentEd );
316 else
318 currentEd = 0;
322 if ( length >= cutLen )
324 return 0;
327 if ( currentEd == 0 )
329 caseB++;
330 return 1;
333 if ( !strict )
335 if ( arcLeft_n < 2 )
337 length += edge_array[currentEd].length;
340 if ( length >= cutLen )
342 return 0;
344 else
346 caseC++;
347 return 1;
351 if ( arcLeft_n < 2 )
353 return 0;
356 if ( !activeArc )
358 fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
361 if ( activeArc->multiplicity == 1 )
363 caseD++;
364 return 1;
367 for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
368 if ( arc->multiplicity > mult )
370 mult = arc->multiplicity;
373 if ( mult > activeArc->multiplicity )
375 caseE++;
378 return mult > activeArc->multiplicity;
381 boolean isUnreliableTip_strict ( unsigned int edgeid, int cutLen )
383 unsigned int arcRight_n, arcLeft_n;
384 unsigned int bal_ed;
385 unsigned int currentEd = edgeid;
386 int length = 0;
387 unsigned int mult = 0;
388 ARC * arc, *activeArc = NULL, *tempArc;
390 if ( edgeid == 0 )
392 return 0;
395 bal_ed = getTwinEdge ( edgeid );
397 if ( bal_ed == edgeid )
399 return 0;
402 arcCount ( bal_ed, &arcLeft_n );
404 if ( arcLeft_n > 0 )
406 return 0;
409 while ( currentEd )
411 arcCount ( bal_ed, &arcLeft_n );
412 tempArc = arcCount ( currentEd, &arcRight_n );
414 if ( arcLeft_n > 1 || arcRight_n > 1 )
416 if ( arcLeft_n == 0 || length == 0 )
418 return 0;
420 else
422 break;
426 length += edge_array[currentEd].length;
428 if ( length >= cutLen )
430 return 0;
433 if ( tempArc )
435 activeArc = tempArc;
436 currentEd = activeArc->to_ed;
437 bal_ed = getTwinEdge ( currentEd );
439 else
441 currentEd = 0;
445 if ( currentEd == 0 )
447 caseA++;
448 return 1;
451 if ( !activeArc )
453 fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
456 if ( activeArc->multiplicity == 1 )
458 caseB++;
459 return 1;
462 for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
463 if ( arc->multiplicity > mult )
465 mult = arc->multiplicity;
468 if ( mult > activeArc->multiplicity )
470 caseC++;
473 return mult > activeArc->multiplicity;
476 /*************************************************
477 Function:
478 removeDeadArcs
479 Description:
480 Removes unvalid arcs.
481 Input:
482 None.
483 Output:
484 None.
485 Return:
486 None.
487 *************************************************/
488 void removeDeadArcs ()
490 unsigned int i, count = 0;
491 ARC * arc, *arc_temp;
493 for ( i = 1; i <= num_ed; i++ )
495 arc = edge_array[i].arcs;
497 while ( arc )
499 arc_temp = arc;
500 arc = arc->next;
502 if ( arc_temp->to_ed == 0 )
504 count++;
505 edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
510 fprintf ( stderr, "%d dead arc(s) removed.\n", count );
513 /*************************************************
514 Function:
515 cutTipsInGraph
516 Description:
517 Cuts the short tips.
518 Input:
519 1. cutLen: the cutoff for the total length of the tips
520 2. strict: the pattern of the cutting tips
521 Output:
522 None.
523 Return:
524 None.
525 *************************************************/
526 void cutTipsInGraph ( int cutLen, boolean strict, boolean last )
528 int flag = 1;
529 unsigned int i;
531 if ( !cutLen )
533 cutLen = 2 * overlaplen;
536 fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
538 if ( strict )
540 linearConcatenate ( 0, last );
543 caseA = caseB = caseC = caseD = caseE = 0;
544 int round = 1;
546 while ( flag )
548 flag = 0;
550 for ( i = 1; i <= num_ed; i++ )
552 if ( edge_array[i].deleted )
554 continue;
557 if ( isUnreliableTip ( i, cutLen, strict ) )
559 destroyEdge ( i );
560 flag++;
564 fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
567 removeDeadArcs ();
569 if ( strict )
571 fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE );
574 linearConcatenate ( 0, last );
575 compactEdgeArray ();