1 #######################
8 #######################
12 a)fix bug for map, zombie process cause by fclose
14 1) readseq1by1.c::openFile4read()
17 signal(SIGCHLD,SIG_IGN);
19 signal(SIGCLD,SIG_IGN);
22 || ((lib_array[i].curr_type == 1) && feof (lib_array[i].fp1) && feof (lib_array[i].fp2))
23 || ((lib_array[i].curr_type != 1) && (lib_array[i].curr_type != 4) && feof (lib_array[i].fp1)))
25 || ((lib_array[i].curr_type == 1 || lib_array[i].curr_type == 2) && feof (lib_array[i].fp1) && feof (lib_array[i].fp2))
26 || ((lib_array[i].curr_type != 1 && lib_array[i].curr_type != 2) && (lib_array[i].curr_type != 4) && feof (lib_array[i].fp1)))
30 a) fix bug for map, zombie process cause by fclose
32 1) readseq1by1.c::openFile4read()
34 sprintf (cmd, "gzip -dc %s", fname);
35 fp = popen (cmd, "r");
39 sprintf (cmd, "gzip -dc %s", fname);
40 fp = popen (cmd, "r");
43 signal(SIGCHLD,SIG_IGN);
45 signal(SIGCLD,SIG_IGN);
53 a) fix bugs for output, delete the empty line of the output(*.edge.gz, *.contig, *.scafSeq)
55 1) concatenateEdge.c::printEdgeSeq()
57 if ((i + overlaplen + 1) % 100 == 0)
59 if ((i + overlaplen + 1) % 100 == 0 && i < len - 1)
60 2) iterate.c::output_1contig()
62 if ((i + overlaplen + 1) % 100 == 0)
64 if ((i + overlaplen + 1) % 100 == 0 && i < edge->length - 1)
65 3) output_contig.c::output_1contig()
67 if ((i + overlaplen + 1) % 100 == 0)
69 if ((i + overlaplen + 1) % 100 == 0 && i < edge->length - 1)
70 4) output_pregraph.c::output_1edge()
72 if ((i + 1) % 100 == 0)
74 if ((i + 1) % 100 == 0 && i < edge->length - 1)
75 5) seq.c::printTightString()
77 if ((i + 1) % 100 == 0)
79 if ((i + 1) % 100 == 0 && i < len - 1)
81 a)outputTightStr2Visual()
82 if ((++column) % 100 == 0)
84 if ((++column) % 100 == 0 && i < end - 1)
86 if ((++column) % 100 == 0)
88 if ((++column) % 100 == 0 && i < length - 1 - start)
90 b)outputTightStrLowerCase2Visual()
91 if ((++column) % 100 == 0)
93 if ((++column) % 100 == 0 && i < end - 1)
101 if(i != 0 && i % 100 == 0) fprintf(fo3, "\n");
103 if(i != 0 && i % 100 == 0 && i < ctg_start_pos - 1) fprintf(fo3, "\n");
105 if(i != 0 && i % 100 == 0) fprintf(fo3, "\n");
107 if(i != 0 && i % 100 == 0 && i < ctg_start_pos - 1) fprintf(fo3, "\n");
118 if(column % 100 != 0)
123 a) fix bugs for removeWeakEdges
125 1) cutTip_graph.c::removeWeakEdges()
127 for (i = 1; i <= num_ed; i++)
129 if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i))
134 bal_ed = getTwinEdge (i);
135 arcRight = arcCount (i, &arcRight_n);
137 if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff)
142 arcLeft = arcCount (bal_ed, &arcLeft_n);
144 if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff)
157 for (i = 1; i <= num_ed; i++)
159 if (edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin (i))
164 bal_ed = getTwinEdge (i);
165 arcRight = arcCount (i, &arcRight_n);
167 if (arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff)
172 arcLeft = arcCount (bal_ed, &arcLeft_n);
174 if (arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff)
183 fprintf (stderr,"%d weak inner edges are destroyed.\n", counter);
188 a) Make the output of SOAPdevovo the same when using different threads.
190 1) contig.c::call_heavygraph()
194 ret = loadPathBin (graphfile);
199 ret = loadPathBin (graphfile);
207 a) fix bugs for merge_linearV2, get the correct coverage of edge, l_links maybe change in the original process
209 1)node2edge.c:merge_linearV2()
211 while (nStack->item_c > 1)
213 temp = (KMER_PT *) stackPop (nStack);
214 del_node = temp->node;
215 del_node->inEdge = 1;
216 symbol += get_kmer_left_covs (*del_node);
220 del_node->l_links = edge_c;
221 del_node->twin = (unsigned char) (bal_edge + 1);
225 del_node->l_links = edge_c + bal_edge;
226 del_node->twin = (unsigned char) (-bal_edge + 1);
229 tightSeq[char_index--] = lastCharInKmer (temp->kmer);
232 while (nStack->item_c > 1)
234 temp = (KMER_PT *) stackPop (nStack);
235 del_node = temp->node;
236 del_node->inEdge = 1;
237 symbol += get_kmer_left_covs (*del_node);
239 tightSeq[char_index--] = lastCharInKmer (temp->kmer);
242 stackRecover(nStack);
243 temp = (KMER_PT *) stackPop (nStack);
244 while(nStack->item_c > 1)
246 temp = (KMER_PT *) stackPop (nStack);
247 del_node = temp->node;
248 del_node->inEdge = 1;
251 del_node->l_links = edge_c;
252 del_node->twin = (unsigned char) (bal_edge + 1);
256 del_node->l_links = edge_c + bal_edge;
257 del_node->twin = (unsigned char) (-bal_edge + 1);
263 a) fix bugs for removeMinorTips
265 1)cutTipPreGraph.c:removeMinorTips ()
267 for (i = 0; i < thrd_num; i++)
277 while (set->iter_ptr < set->size)
279 if (!is_kmer_entity_null (set->flags, set->iter_ptr))
281 rs = set->array + set->iter_ptr;
282 flag += clipTipFromNode (rs, cut_len_tip, 0);
289 fprintf (stderr,"Remove minor tips in kmer set %d is done.\n", i);
296 for (i = 0; i < thrd_num; i++)
301 while (set->iter_ptr < set->size)
303 if (!is_kmer_entity_null (set->flags, set->iter_ptr))
305 rs = set->array + set->iter_ptr;
306 if (!rs->linear && !rs->deleted)
308 flag += clipTipFromNode (rs, cut_len_tip, 0);
314 fprintf (stderr,"Remove minor tips in kmer set is done(round %d).\n", round++);
319 a) merge 63mer and 127mer version
321 1) Use #define MER127 to define 127mer version
322 2) Use #define MER63 to define 63mer version
326 a) fix bugs of AIORead in reading fastq file which has "@" at the beginning of qulity line. Edited by panqi.
328 1) prlHashReads.c::AIORead()
329 a) if((curr_type == 2) || (curr_type == 6))
331 for (i = get - 1; (temp[i] != '@') || (temp[i-1] != '\n') || (temp[i-2] == '+')|| (temp[i-3] == '/'); i--);
332 for (i2 = i - 2; temp[i2] != '\n'; i2--);
333 if (temp[i2 + 1] == '+')
335 for(i3 = i2 - 1; temp[i3] != '\n'; i3--);
336 for(i2 = i3 - 1; temp[i2] != '\n'; i2--);
337 if(temp[i2 + 1] == '@') {i = i2 + 1;}
342 if((curr_type == 2) || (curr_type == 6))
345 for (i = get - 1; (temp[i] != '@') || (temp[i-1] != '\n'); i--)
347 if(temp[i] == '\n') {num++;}
352 for (i2 = i - 2; temp[i2] != '\n'; i2--);
353 if (temp[i2 + 1] == '+')
355 for(i2 = i2 - 1; temp[i2] != '\n'; i2--);
356 if(temp[i2 + 1] != '+') {for (i = i2 - 1; (temp[i] != '@') || (temp[i-1] != '\n'); i--);}
364 a) seperate codes related to visualization from other codes. Edited by panqi.
367 codes related to visualization
370 a) fix bugs of AIORead in reading fastq file which has "@" at the beginning of qulity line. Edited by panqi.
372 1) prlHashReads.c::AIORead()
373 a) if((curr_type == 2) || (curr_type == 6))
374 for (i = get - 1; (temp[i] != '@') || (temp[i-1] != '\n') || (temp[i-2] == '+')|| (temp[i-3] == '/'); i--);
375 else if((curr_type == 1) || (curr_type == 3) ||(curr_type == 5))
376 for (i = get - 1; temp[i] != '>';i--) ;
378 if((curr_type == 2) || (curr_type == 6))
380 for (i = get - 1; (temp[i] != '@') || (temp[i-1] != '\n') || (temp[i-2] == '+')|| (temp[i-3] == '/'); i--);
381 for (i2 = i - 2; temp[i2] != '\n'; i2--);
382 if (temp[i2 + 1] == '+')
384 for(i3 = i2 - 1; temp[i3] != '\n'; i3--);
385 for(i2 = i3 - 1; temp[i2] != '\n'; i2--);
386 if(temp[i2 + 1] == '@') {i = i2 + 1;}
389 else if((curr_type == 1) || (curr_type == 3) ||(curr_type == 5)
390 for (i = get - 1; temp[i] != '>';i--) ;
393 a) fix bug of AIORead to deal with the case that read length is shorter than Kmer size. Edited by panqi.
395 1) prlHashReads.c::prlRead2HashTable()
396 a) while (start1 < offset1 || start2 < offset2)
401 readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start1, offset1, libNo);
402 if ((++i) % 100000000 == 0)
403 printf ("--- %lldth reads\n", i);
404 if (lenBuffer[read_c] < 0)
405 printf ("read len %d\n", lenBuffer[read_c]);
406 if (lenBuffer[read_c] < overlaplen + 1)
413 readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer2, &start2, offset2, libNo);
414 if ((++i) % 100000000 == 0)
415 printf ("--- %lldth reads\n", i);
416 if (lenBuffer[read_c] < 0)
417 printf ("read len %d\n", lenBuffer[read_c]);
418 if (lenBuffer[read_c] < overlaplen + 1)
424 while (start1 < offset1 || start2 < offset2)
429 readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer1, &start1, offset1, libNo);
430 if ((++i) % 100000000 == 0)
431 printf ("--- %lldth reads\n", i);
432 if (lenBuffer[read_c] < 0)
433 printf ("read len %d\n", lenBuffer[read_c]);
434 if (lenBuffer[read_c] < overlaplen + 1)
439 flag1=AIORead (&aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type);
447 readseqInLib (seqBuffer[read_c], next_name, &(lenBuffer[read_c]), readBuffer2, &start2, offset2, libNo);
448 if ((++i) % 100000000 == 0)
449 printf ("--- %lldth reads\n", i);
450 if (lenBuffer[read_c] < 0)
451 printf ("read len %d\n", lenBuffer[read_c]);
452 if (lenBuffer[read_c] < overlaplen + 1)
454 if((flag2 == 2) && (start2 >= offset2))
456 if(start2 >= offset2)
459 flag2 = AIORead (&aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type);
469 a) add a parameter "-V" so that some information for visualization can be output by setting this parameter. Edited by panqi.
473 codes related to visualization
477 a) Fix a bug of scaffolding which may lead to segmentation fault or endless loop
480 1) Function: orderContig.c::linearC2C()
482 if (usCtgCounter != oldsize)
484 unsigned int prev_c2 = upstreamCTG[usCtgCounter+1];
485 unsigned int bal_prev_c2 = getTwinCtg (prev_c2);
486 setConnectMask (bal_prev_c2, c2, 1);
487 setConnectMask (bal_prev_c2, c2, 0);
489 int i = usCtgCounter+1;
490 for ( ; i <= oldsize; i++)
492 contig_array[upstreamCTG[i]].from_vt = prev_c2;
493 contig_array[getTwinCtg(upstreamCTG[i])].to_vt = bal_prev_c2;
496 if ((cn_temp = getCntBetween(c1,c2)) != NULL)
498 setConnectMask (c1, c2, 0);
499 setConnectDelete (c1, c2, 0, 0);
506 a) Change default value of '-b' from 0 to 1.5. Update relative statements.
508 b) Change default value of '-B' from 0 to 0.6. Update relative statements.
510 c) Update some code so it can read '*.readInGap' and '*.readOnContig' produced by V1.05.
513 1) Variant: inc/globe.h
514 a) float cvg4SNP = 0.0;
518 b) float ins_var_idx=0;
520 float ins_var_idx=1.5
522 2) Function: scaffold.c::display_scaff_usage()
523 a) printf ("\nscaff -g inputGraph [-F -m -u -S -w] [-G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -N genomeSize -p n_cpu]\n");
525 printf ("\nscaff -g inputGraph [-F -m -u -S -w] [-G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize -p n_cpu]\n");
527 b) printf (" -b <float>\tinsertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for long insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [0]\n");
529 printf (" -b <float>\tinsertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for long insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n");
531 c) printf (" -B <float>\tbubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0]\n");
533 printf (" -B <float>\tbubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n");
535 3) Variant: main.c::pipeline()
536 a) unsigned char getB is now related with "bubbleCoverage" instead of "insertSizeUpperBound".
539 unsigned char getb; //related with "insertSizeUpperBound".
542 char bubble_coverage_s[16];
545 float bubble_coverage = 0.0;
547 e) int insert_size_bound = 0;
549 float insert_size_bound = 0.0;
551 4) Function: main.c::pipeline()
552 a) while ((copt = getopt (argc, argv, "a:s:o:K:M:L:p:G:d:D:RuFk:fc:C:b:N:w")) != EOF)
554 while ((copt = getopt (argc, argv, "a:s:o:K:M:L:p:G:d:D:RuFk:fc:C:b:B:N:w")) != EOF)
559 sscanf (optarg, "\s", temp);
560 bubble_coverage = atof (temp);
566 options[index++] = "-B";
567 sprintf (bubble_coverage_s, "%f", bubble_coverage);
568 options[index++] = bubble_coverage_s;
571 5) Function: main.c::display_all_usage()
572 a) printf ("\nSOAPdenovo all -s configFile -o outputGraph [-R -f -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -N genomeSize]\n");
574 printf ("\nSOAPdenovo all -s configFile -o outputGraph [-R -f -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize]\n");
576 b) printf (" -b <float>\tinsertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [0]\n");
578 printf (" -b <float>\tinsertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n");
581 printf (" -B <float>\tbubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n");
583 6) Function: orderContig.c::PE2Links()
589 b) sprintf (name, "%s.readOnContig.gz", infile);
590 fp = gzopen (name, "r");
592 if (COMPATIBLE_MODE == 1)
594 sprintf(name,"%s.readOnContig",infile);
595 fp1 = ckopen(name,"r");
599 sprintf (name, "%s.readOnContig.gz", infile);
600 fp2 = gzopen (name, "r");
603 c) gzgets (fp, line, lineLen);
605 if (COMPATIBLE_MODE == 1)
607 fgets(line,lineLen,fp1);
611 gzgets (fp2, line, lineLen);
614 d) flag += connectByPE_grad(fp,i,line);
616 if (COMPATIBLE_MODE == 1)
618 flag += connectByPE_grad(fp1,i,line);
622 flag += connectByPE_grad_gz(fp2,i,line);
627 if (COMPATIBLE_MODE == 1)
636 7) Function: inc/extfunc.h
637 a) connectByPE_grad( gzFile* fp, int peGrad, char * line );
639 extern int connectByPE_grad_gz ( gzFile * fp, int peGrad, char * line );
641 8) Function: attachPEinfo.c
643 int connectByPE_grad (FILE * fp, int peGrad, char *line)
645 b) int connectByPE_grad (gzFile * fp, int peGrad, char *line)
647 int connectByPE_grad_gz (gzFile * fp, int peGrad, char *line)
649 9) Function: prlReadFillGap.c::loadReads4gap()
650 a) update some code to read '*.readInGap'.
653 1) Variant: inc/globe.h
657 2) Variant: inc/extvab.h
659 extern float cvg4SNP;
661 3) struct: inc/def.h::struct contig
663 unsigned char bubbleInScaff: 1;
665 4) Function: scaffold.c::initenv()
666 a) while ((copt = getopt (argc, argv, "g:L:p:G:N:c:C:b:FmuSw")) != EOF)
668 while ((copt = getopt (argc, argv, "g:L:p:G:N:c:C:b:B:FmuSw")) != EOF)
672 sscanf (optarg, "%s", temp);
673 cvg4SNP = atof (temp) > 0 ? atof (temp) : 0.0;
676 5) Function: scaffold.c::display_scaff_usage()
678 printf (" -B <float>\tbubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0]\n");
680 6) Function: loadGraph.c::loadUpdatedEdges()
682 contig_array[newIndex].bubbleInScaff = 0;
684 7) Variant: orderContig.c
686 static FILE *snp_fp = NULL;
688 8) Fucntion: orderContig.c::Links2Scaf()
689 a) cvg4SNP = ((double)(0.5*cvgAvg) + sqrt(2*cvgAvg));
693 sprintf(name, "%s.bubbleInScaff", infile);
694 snp_fp = ckopen(name, "w");
696 cvg4SNP = (double)(cvg4SNP*cvgAvg);
704 9) Fucntion: inc/extfunc.h
706 extern void outputTightStr(FILE *fp,char *tightStr,int start,int length, int outputlen,int revS,int *col);
708 10) Fcuntion: prlReadFillGap.c::outputTightStr()
709 a) static void outputTightStr (FILE * fp, char *tightStr, int start, int length, int outputlen, int revS, int *col)
711 void outputTightStr (FILE * fp, char *tightStr, int start, int length, int outputlen, int revS, int *col)
713 11) Function: orderContig.c
715 static void output_ctg(unsigned int ctg,FILE *fo)
717 if(contig_array[ctg].length<1)
720 unsigned int bal_ctg = getTwinCtg(ctg);
722 len = contig_array[ctg].length + overlaplen;
725 if(contig_array[ctg].seq){
726 fprintf(fo,">C%d %4.1f\n",ctg,(double)contig_array[ctg].cvg);
727 outputTightStr(fo,contig_array[ctg].seq,0,len,len,0,&col);
729 else if(contig_array[bal_ctg].seq){
730 fprintf(fo,">C%d %4.1f\n",bal_ctg,(double)contig_array[ctg].cvg);
731 outputTightStr(fo,contig_array[bal_ctg].seq,0,len,len,0,&col);
736 12) Function: orderContig.c::removeSNPCtg()
737 a) unsigned int node1, node2;
739 unsigned int node1, node2, bal_node1, bal_node2;
742 bal_node1 = getTwinCtg(node1);
743 bal_node2 = getTwinCtg(node2);
745 c) getEndKmers(contig_array[getTwinCtg(node1)].seq, len1, 1, firstKmer1, lastKmer1);
747 getEndKmers(contig_array[bal_node1].seq, len1, 1, firstKmer1, lastKmer1);
749 d) getEndKmers(contig_array[getTwinCtg(node2)].seq, len2, 1, firstKmer2, lastKmer2);
751 getEndKmers(contig_array[bal_node2].seq, len2, 1, firstKmer2, lastKmer2);
754 if (contig_array[node1].bubbleInScaff == 0 || contig_array[node2].bubbleInScaff == 0)
756 contig_array[node1].bubbleInScaff = 1;
757 contig_array[bal_node1].bubbleInScaff = 1;
758 contig_array[node2].bubbleInScaff = 1;
759 contig_array[bal_node2].bubbleInScaff = 1;
760 output_ctg(node1, snp_fp);
761 output_ctg(node2, snp_fp);
764 f) transferCnt2RemainNode(getTwinCtg(node2), getTwinCtg(node1));
766 transferCnt2RemainNode(bal_node2, bal_node1);
768 g) transferCnt2RemainNode(getTwinCtg(node1), getTwinCtg(node2));
770 transferCnt2RemainNode(bal_node1, bal_node2);
773 1) Function: orderContig.c::scaffolding()
774 a) if(pre_score == 0)
776 *(unsigned int *)darrayPut(tempArray,prev_p)=0;
779 else if(now_ctg_score == 0)
781 *(unsigned int *)darrayPut(tempArray,j)=0;
783 if(j < tempCounter -1)
789 *(unsigned int *)darrayPut(tempArray,prev_p)=0;
790 contig_array[prev_id].flag = 0;
791 contig_array[getTwinCtg(prev_id)].flag = 0;
794 else if(now_ctg_score == 0)
796 *(unsigned int *)darrayPut(tempArray,j)=0;
797 contig_array[nowid].flag = 0;
798 contig_array[getTwinCtg(nowid)].flag = 0;
800 if(j < tempCounter -1)
804 b) if(contig_array[prev_id].cvg < contig_array[nowid].cvg)
806 *(unsigned int *)darrayPut(tempArray,prev_p)=0;
810 *(unsigned int *)darrayPut(tempArray,j)=0;
811 if(j < tempCounter -1)
815 if(contig_array[prev_id].cvg < contig_array[nowid].cvg)
817 *(unsigned int *)darrayPut(tempArray,prev_p)=0;
818 contig_array[prev_id].flag = 0;
819 contig_array[getTwinCtg(prev_id)].flag = 0;
823 *(unsigned int *)darrayPut(tempArray,j)=0;
824 contig_array[nowid].flag = 0;
825 contig_array[getTwinCtg(nowid)].flag = 0;
826 if(j < tempCounter -1)
830 c) if(score_count < 2)
832 free((void *)score_array);
836 if(score_mask == 1 && num3 + num5 > 5 && score_count < 2)
838 free((void *)score_array);
841 for (j=0; j<num3; j++)
843 ctg = *(unsigned int *)darrayGet(scaf3,j);
844 contig_array[ctg].flag = 0;
845 contig_array[getTwinCtg(ctg)].flag = 0;
847 for (j=0; j<num5; j++)
849 ctg = *(unsigned int *)darrayGet(scaf5,j);
850 contig_array[ctg].flag = 0;
851 contig_array[getTwinCtg(ctg)].flag = 0;
856 d) after obtaining downstream contigs and upstream contigs
860 contig_array[i].flag = 0;
866 1) Function: orderContig.c::checkScafConsist()
867 a) if(!linkCount1||!linkCount2)
871 2) Function: loadGraph.c::
874 cut_len = COMPATIBLE_MODE == 0 ? overlaplen : 0;
876 b) if(length != 0) contig_array[newIndex].length = length - overlaplen;
878 if(length != 0) contig_array[newIndex].length = length - cut_len;
880 c) counter += length - overlaplen;
881 cvgSum += cvg * (length - overlaplen);
883 counter += length - cut_len;
884 cvgSum += cvg * (length - cut_len);
887 1) Function: orderContig.c::checkScafConsist()
888 a) boolean checkScafConsist(STACK *scafStack1, int len1, STACK *scafStack2)
890 boolean checkScafConsist(STACK *scafStack1, int len1, STACK *scafStack2, int len2)
892 b) int linkCount1 = getDSLink2Scaf(scafStack1,downwardTo1,downwardWt1);
893 int linkCount2 = getDSLink2Scaf(scafStack2,downwardTo2,downwardWt2);
895 int linkCount1 = getDSLink2Scaf(scafStack1,downwardTo1,downwardWt1, len1);
896 int linkCount2 = getDSLink2Scaf(scafStack2,downwardTo2,downwardWt2, len2);
899 boolean flag2 = isLinkReliable(downwardWt2,linkCount2);
901 d) if(!flag1||!flag2)
905 e) if (linkCount2 && flag2)
909 2) Function: orderContig.c::detectBreakScaff()
910 a) int flag1 = checkScafConsist(scafStack1,scafStack2);
911 int flag2 = checkScafConsist(scafStack2,scafStack1);
913 int flag1 = checkScafConsist(scafStack1, len1, scafStack2, len2);
914 int flag2 = checkScafConsist(scafStack2, len2, scafStack1, len1);
917 a) int getDSLink2Scaf(STACK *scafStack,DARRAY *SCAF,DARRAY *WT)
919 int getDSLink2Scaf(STACK *scafStack,DARRAY *SCAF,DARRAY *WT, int total_len)
926 bind_cnt = getBindCnt(ctg);
927 gap = bind_cnt ? bind_cnt->gapLen : 0;
928 len += contig_array[ctg].length + gap;
930 d) if ((contig_array[ctg].mask && contig_array[ctg].length < 500) || !contig_array[ctg].downwardConnect)
932 if ((contig_array[ctg].mask && contig_array[ctg].length < 500) || !contig_array[ctg].downwardConnect
933 || total_len - len > Insert_size)
936 1) Function: orderContig.c::getDSLink2Scaf()
938 if (ite_cnt->newIns != 1)
940 ite_cnt = ite_cnt->next;
945 1) Function: orderContig.c::getScaffold()
946 a) len += contig_array[ctg].length;
948 len += bindCnt->gapLen + contig_array[ctg].length;
950 2) Function: orderContig.c::getDSLink2Scaf()
952 unsigned int targetCtg,bal_targetCtg;
955 targetCtg = ite_cnt->contigID;
956 bal_targetCtg = getTwinCtg(targetCtg);
958 c) if ((ite_cnt->mask && contig_array[ctg].length < 500) || ite_cnt->singleInScaf
960 if ((ite_cnt->mask && contig_array[targetCtg].length < 500) || ite_cnt->singleInScaf
962 d) if(contig_array[ctg].from_vt==contig_array[targetCtg].from_vt)
964 if(contig_array[ctg].from_vt==contig_array[targetCtg].from_vt
965 || (targetCtg==contig_array[targetCtg].from_vt && bal_targetCtg==contig_array[bal_targetCtg].from_vt))
968 1) struct: inc/def.h::struct connection
969 a) add new member: unsigned char newIns:1;
970 It's used to indicate whether the PE relationship between contigs is provided by the latest rank
972 2) Function: orderContig.c::inputLinks()
973 a) add variants: CONNECT *cnt, *bal_cnt;
975 b) add1Connect(ctg,toCtg,gap,wt,0);
976 add1Connect(bal_toCtg,bal_ctg,gap,wt,0);
978 cnt = add1Connect(ctg,toCtg,gap,wt,0);
979 bal_cnt = add1Connect(bal_toCtg,bal_ctg,gap,wt,0);
980 if (cnt && insertS > 1000)
982 cnt->newIns = bal_cnt->newIns = 1;
985 3) Function: orderContig.c::detectBreakScaff()
986 a) new function, developed from detectBreakScaf().
988 4) Function: orderContig.c::clearNewInsFlag()
991 5) Function: orderContig.c::Links2Scaf()
992 a) if(i==gradsCounter-1&&!isPrevSmall&&smallPE)
995 if(Insert_size > 1000)
998 if (Insert_size > 1000 && i != gradsCounter-1)
1002 1) Function: readseq1by1.c::openFile4read()
1003 a) fp = popen (cmd, "r");
1008 if ((fp = popen (cmd, "r")) != NULL)
1016 1) Function: readseqfq()
1017 a) int i, n, strL, m, p;
1019 int i, n, strL, m, p=0;
1021 b) else if (buf[m] == '\n' && buf[p] == '\n')
1023 else if (buf[m] == '\n' && buf[p] == '\n' && m>p)
1024 Note: Both modifications are provided by Pan Qi.
1027 1) Function: Links2Scaf()
1028 a) cvg4SNP = ((double)(0.5*cvgAvg) + sqrt(2*cvgAvg));
1030 cvg4SNP = ((double)(0.5*cvgAvg) + 3.0*sqrt(cvgAvg/2.0));
1031 Note: This change maybe not suit for all cases
1033 2) Function: general_linearization()
1034 a) SNPCtgCounter += removeSNPCtg();
1036 if (Insert_size < 10000)
1038 SNPCtgCounter += removeSNPCtg();
1042 1) Function: arrangeNodes_general()
1045 //recover connections
1048 if(temp_cnt && (bySmall || temp_cnt->bySmall || temp_cnt->smallIns || (!dh_cnt || !dh_cnt->bySmall || !dh_cnt->smallIns)))
1050 //recover connections
1053 b) else if (dh_cnt && ((dh_cnt->bySmall || dh_cnt->smallIns || bySmall)
1054 || ((-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length)
1055 && tmp_dis > 0 && tmp_dis < 0.2*Insert_size )))
1057 //exchange connections
1060 else if (dh_cnt && ((dh_cnt->bySmall || dh_cnt->smallIns || bySmall)
1061 || ((-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length)
1062 && tmp_dis > 0 && tmp_dis < 500 )))
1064 //exchange connections
1068 1) Function: createAnalogousCnt()
1069 a) if(gap<GapLowerBound)
1075 if(gap<GapLowerBound)
1078 originCnt->deleted = 1;
1079 temp_cnt = getCntBetween(balSourceStop,balSourceStart);
1084 1) Function: downSlide()
1085 a) annotate the codes related to "lastTop".
1087 2) Function: scaffolding()
1088 a) replace the old codes related to the calculation of the score of contig's connection with the new codes from version 0729/63mer updated by Binghang Liu
1090 b) if(now_ctg_score == 0 && ((j == num3-1 && contig_array[nextid].length < 200 && num3 + num5 > 5)|| (now_cnt_weigth == 0&& j>0 )))
1092 if(score_mask == 1 && now_ctg_score == 0
1093 && ((j == num3-1 && contig_array[nextid].length < 200 && num3 + num5 > 5)|| (now_cnt_weigth == 0&& j>0 )))
1095 c) if(now_ctg_score == 0 && ((j == num5-1 && contig_array[*(unsigned int *)darrayGet(scaf5,j-1)].length <200 && num3 + num5 > 5)|| (j != num5-1 && now_cnt_weigth == 0)))
1097 if(score_mask == 1 && now_ctg_score == 0
1098 && ((j == num5-1 && contig_array[*(unsigned int *)darrayGet(scaf5,j-1)].length <200 && num3 + num5 > 5)
1099 || (j != num5-1 && now_cnt_weigth == 0)))
1101 3) Function: get_ctg_score2()
1102 a) if(dh_cnt && dh_cnt->weight >0)
1105 if(dh_cnt && dh_cnt->weight >0)
1108 4) Function: arrangeNodes_general()
1109 a) else if(dh_cnt && ((dh_cnt->weight > weakPE && (-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length))|| (dh_cnt->bySmall || dh_cnt->smallIns || bySmall)))
1111 else if (dh_cnt && ((dh_cnt->bySmall || dh_cnt->smallIns || bySmall)
1112 || ((-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length)
1113 && tmp_dis > 0 && tmp_dis < 0.2*Insert_size )))
1115 5) Function: removeSNPCtg()
1117 CONNECT *cnt, *bal_cnt;
1118 cnt = getCntBetween(node1, node2);
1119 dh_cnt = getCntBetween(node2, node1);
1121 if (gap >= 0 || contig_array[node1].cvg > cvg4SNP || contig_array[node2].cvg > cvg4SNP)
1123 if (gap >= 0 || contig_array[node1].cvg > cvg4SNP || contig_array[node2].cvg > cvg4SNP || cnt || bal_cnt)
1125 6) Function: outputScafSeq()
1126 a) if (prevCtg && actg->scaftig_start)
1130 gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length - overlaplen;
1132 else gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
1134 // gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length; //lzy 0907
1135 gapN = gapN > 0 ? gapN : 1;
1136 outputNs (fo, scaffBuffer, gapN, &column);
1137 ctg_start_pos += gapN;
1138 //outputGapInfo(prevCtg->ctgID,ctg);
1149 start = actg->cutHead;
1154 if (prevCtg && actg->scaftig_start)
1159 gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length - overlaplen;
1161 else gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
1163 gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length; //lzy 0907
1164 gapN = gapN > 0 ? gapN : 1;
1165 outputNs (fo, scaffBuffer, gapN, &column);
1166 ctg_start_pos += gapN;
1167 //outputGapInfo(prevCtg->ctgID,ctg);
1177 start = actg->cutHead;
1182 1) Function: removeSNPCtg()
1185 transferCnt2RemainNode()
1186 reason: not only mask SNP contig, but also transfer the PE information from masked SNP contig to remain SNP contig
1189 1) Function: Links2Scaf()
1190 a) cvg4SNP = 0.6*cvgAvg;
1192 cvg4SNP = ((double)(0.5*cvgAvg) + sqrt(2*cvgAvg));
1195 1) recover the function removeSNPCtg()
1197 2) Function: removeSNPCtg()
1198 a) if (len1 > len2 || (len1 == len2 && contig_array[node1].cvg > contig_array[node2].cvg))
1200 if (contig_array[node1].cvg > contig_array[node2].cvg || (len1 > len2 && contig_array[node1].cvg == contig_array[node2].cvg))
1201 REASON: to keep accordance with the operation of merging bubble that remains the edge with higher coverage
1204 1) Function: arrangeNodes_general()
1205 a) if (comCount > 0 && abs(adjustedGap/comCount) > abs(gap))
1209 b) if (bySmall > 0 ||(-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length)
1210 && (i != nodeCounter-1))
1212 if (bySmall > 0 && gap < 0
1213 ||((-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length)
1214 && (i != nodeCounter-1)))
1216 2) Function: getCntNodes()
1217 a) if (cnt->weight < 3)
1219 if (0 == bySmall && cnt->weight < 3 && !cnt->smallIns && !cnt->bySmall)
1221 3) Function: calGapLen()
1222 a) if (cnt && cnt->weight>=3)
1224 if (cnt && (cnt->weight>=3 || bySmall || cnt->smallIns || cnt->bySmall))
1227 1) Function: arrangeNodes_general()
1228 a) else if(dh_cnt && ((dh_cnt->weight > weakPE && (-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length))|| (dh_cnt->bySmall && gap < -overlaplen )))
1230 else if(dh_cnt && ((dh_cnt->weight > weakPE && (-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length))|| (dh_cnt->bySmall || dh_cnt->smallIns || bySmall)))
1232 b) if ((-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length) && (i != nodeCounter-1))
1235 ||(-gap > (int)contig_array[node1].length || -gap > (int)contig_array[node2].length) && (i != nodeCounter-1))
1237 c) if (comCount > 0 && adjustedGap/comCount) > gap)
1239 if (comCount > 0 && abs(adjustedGap/comCount) > abs(gap))
1241 2) Function: Links2Scaf()
1242 a) bySmall = Insert_size > 1000 ? 2 : 0;
1244 bySmall = Insert_size > 1000 ? 0 : 1;