4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
33 static int scafBufSize
= 100;
34 static STACK
** ctgStackBuffer
;
35 static int scafCounter
;
38 static void convertIndex ()
40 int * length_array
= ( int * ) ckalloc ( ( num_ctg
+ 1 ) * sizeof ( int ) );
43 for ( i
= 1; i
<= num_ctg
; i
++ )
48 for ( i
= 1; i
<= num_ctg
; i
++ )
50 if ( index_array
[i
] > 0 )
52 length_array
[index_array
[i
]] = i
;
56 for ( i
= 1; i
<= num_ctg
; i
++ )
58 index_array
[i
] = length_array
[i
];
59 } //contig i with new index: index_array[i]
61 free ( ( void * ) length_array
);
64 static void reverseStack ( STACK
* dStack
, STACK
* sStack
)
66 CTGinSCAF
* actg
, *ctgPt
;
67 emptyStack ( dStack
);
69 while ( ( actg
= ( CTGinSCAF
* ) stackPop ( sStack
) ) != NULL
)
71 ctgPt
= ( CTGinSCAF
* ) stackPush ( dStack
);
72 ctgPt
->ctgID
= actg
->ctgID
;
73 ctgPt
->start
= actg
->start
;
74 ctgPt
->end
= actg
->end
;
77 stackBackup ( dStack
);
80 static void initStackBuf ( STACK
** ctgStackBuffer
, int scafBufSize
)
84 for ( i
= 0; i
< scafBufSize
; i
++ )
86 ctgStackBuffer
[i
] = ( STACK
* ) createStack ( 100, sizeof ( CTGinSCAF
) );
89 static void freeStackBuf ( STACK
** ctgStackBuffer
, int scafBufSize
)
93 for ( i
= 0; i
< scafBufSize
; i
++ )
95 freeStack ( ctgStackBuffer
[i
] );
99 static void mapCtg2Scaf ( int scafInBuf
)
104 unsigned int ctg
, bal_ctg
;
106 for ( i
= 0; i
< scafInBuf
; i
++ )
108 scafID
= scafCounter
+ i
+ 1;
109 ctgsStack
= ctgStackBuffer
[i
];
111 while ( ( actg
= stackPop ( ctgsStack
) ) != NULL
)
114 bal_ctg
= getTwinCtg ( ctg
);
116 if ( contig_array
[ctg
].from_vt
!= 0 )
118 contig_array
[ctg
].multi
= 1;
119 contig_array
[bal_ctg
].multi
= 1;
123 contig_array
[ctg
].from_vt
= scafID
;
124 contig_array
[ctg
].to_vt
= actg
->start
;
125 contig_array
[ctg
].flag
= 0; //ctg and scaf on the same strand
126 contig_array
[bal_ctg
].from_vt
= scafID
;
127 contig_array
[bal_ctg
].to_vt
= actg
->start
;
128 contig_array
[bal_ctg
].flag
= 1;
133 static void locateContigOnscaff ( char * graphfile
)
138 STACK
* ctgStack
, *aStack
;
139 int index
= 0, counter
, overallLen
;
140 int starter
, prev_start
, gapN
, scafLen
;
141 unsigned int ctg
, prev_ctg
= 0;
143 for ( ctg
= 1; ctg
<= num_ctg
; ctg
++ )
145 contig_array
[ctg
].from_vt
= 0;
146 contig_array
[ctg
].multi
= 0;
149 ctgStack
= ( STACK
* ) createStack ( 1000, sizeof ( CTGinSCAF
) );
150 sprintf ( line
, "%s.scaf_gap", graphfile
);
151 fp
= ckopen ( line
, "r" );
152 ctgStackBuffer
= ( STACK
** ) ckalloc ( scafBufSize
* sizeof ( STACK
* ) );
153 initStackBuf ( ctgStackBuffer
, scafBufSize
);
154 Ncounter
= scafCounter
= scafInBuf
= allGaps
= 0;
156 while ( fgets ( line
, sizeof ( line
), fp
) != NULL
)
158 if ( line
[0] == '>' )
162 aStack
= ctgStackBuffer
[scafInBuf
++];
163 reverseStack ( aStack
, ctgStack
);
165 if ( scafInBuf
== scafBufSize
)
167 mapCtg2Scaf ( scafInBuf
);
168 scafCounter
+= scafInBuf
;
172 if ( index
% 1000 == 0 )
174 fprintf ( stderr
, "Processed %d scaffolds.\n", index
);
179 scafLen
= prev_ctg
= 0;
180 emptyStack ( ctgStack
);
181 sscanf ( line
+ 9, "%d %d %d", &index
, &counter
, &overallLen
);
182 //fprintf(stderr,">%d\n",index);
186 if ( line
[0] == 'G' ) // gap appears
191 if ( line
[0] >= '0' && line
[0] <= '9' ) // a contig line
193 sscanf ( line
, "%d %d", &ctg
, &starter
);
194 actg
= ( CTGinSCAF
* ) stackPush ( ctgStack
);
199 actg
->start
= scafLen
;
200 actg
->end
= actg
->start
+ overlaplen
+ contig_array
[ctg
].length
- 1;
204 gapN
= starter
- prev_start
- ( int ) contig_array
[prev_ctg
].length
;
205 gapN
= gapN
< 1 ? 1 : gapN
;
206 actg
->start
= scafLen
+ gapN
;
207 actg
->end
= actg
->start
+ contig_array
[ctg
].length
- 1;
210 //fprintf(stderr,"%d\t%d\n",actg->start,actg->end);
211 scafLen
= actg
->end
+ 1;
213 prev_start
= starter
;
219 aStack
= ctgStackBuffer
[scafInBuf
++];
220 reverseStack ( aStack
, ctgStack
);
221 mapCtg2Scaf ( scafInBuf
);
226 for ( ctg
= 1; ctg
<= num_ctg
; ctg
++ )
228 if ( contig_array
[ctg
].from_vt
== 0 || contig_array
[ctg
].multi
== 1 )
236 fprintf ( stderr
, "\nDone with %d scaffolds, %d contigs in Scaffolld\n", index
, gapN
);
239 freeDarray(readSeqInGap);
242 freeStack ( ctgStack
);
243 freeStackBuf ( ctgStackBuffer
, scafBufSize
);
244 free ( ( void * ) ctgStackBuffer
);
247 static boolean
contigElligible ( unsigned int contigno
)
249 unsigned int ctg
= index_array
[contigno
];
251 if ( contig_array
[ctg
].from_vt
== 0 || contig_array
[ctg
].multi
== 1 )
260 static void output1read ( FILE * fo
, long long readno
, unsigned int contigno
, int pos
)
262 unsigned int ctg
= index_array
[contigno
];
265 pos
= pos
< 0 ? 0 : pos
;
267 if ( contig_array
[ctg
].flag
== 0 )
269 posOnScaf
= contig_array
[ctg
].to_vt
+ pos
- overlaplen
;
274 posOnScaf
= contig_array
[ctg
].to_vt
+ contig_array
[ctg
].length
- pos
;
280 printf("Read %lld in region from %d, extend %d, pos %d, orien %c\n",
281 readno,contig_array[ctg].to_vt,contig_array[ctg].length,posOnScaf,orien);
283 fprintf ( fo
, "%lld\t%d\t%d\t%c\n", readno
, contig_array
[ctg
].from_vt
, posOnScaf
, orien
);
286 void locateReadOnScaf ( char * graphfile
)
288 char name
[1024], line
[1024];
290 long long readno
, counter
= 0, pre_readno
= 0;
291 unsigned int contigno
, pre_contigno
;
293 locateContigOnscaff ( graphfile
);
294 sprintf ( name
, "%s.readOnContig", graphfile
);
295 fp
= ckopen ( name
, "r" );
296 sprintf ( name
, "%s.readOnScaf", graphfile
);
297 fo
= ckopen ( name
, "w" );
305 fgets ( line
, 1024, fp
);
307 while ( fgets ( line
, 1024, fp
) != NULL
)
309 sscanf ( line
, "%lld %d %d", &readno
, &contigno
, &pos
);
311 if ( ( readno
% 2 == 0 ) && ( pre_readno
== readno
- 1 ) // they are a pair of reads
312 && contigElligible ( pre_contigno
) && contigElligible ( contigno
) )
314 output1read ( fo
, pre_readno
, pre_contigno
, pre_pos
);
315 output1read ( fo
, readno
, contigno
, pos
);
320 pre_contigno
= contigno
;
324 fprintf ( stderr
, "%lld pair(s) on contig\n", counter
);