4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 /*************************************************
33 Records the id of read which crosses the edge.
35 1. edgeno: the edge index
36 2. readid: the read id
41 *************************************************/
42 static void add1marker2edge ( unsigned int edgeno
, long long readid
)
44 if ( edge_array
[edgeno
].multi
== 255 )
49 unsigned int bal_ed
= getTwinEdge ( edgeno
);
50 unsigned char counter
= edge_array
[edgeno
].multi
++;
51 edge_array
[edgeno
].markers
[counter
] = readid
;
52 counter
= edge_array
[bal_ed
].multi
++;
53 edge_array
[bal_ed
].markers
[counter
] = -readid
;
56 /*************************************************
60 1. Loads the path info.
61 2. Records the ids of reads crossing edges.
63 1. graphfile: the input prefix
68 *************************************************/
69 boolean
loadPath ( char * graphfile
)
72 char name
[256], line
[1024];
73 unsigned int i
, bal_ed
, num1
, edgeno
, num2
;
74 long long markCounter
= 0, readid
= 0;
76 sprintf ( name
, "%s.markOnEdge", graphfile
);
77 fp
= fopen ( name
, "r" );
84 for ( i
= 1; i
<= num_ed
; i
++ )
86 edge_array
[i
].multi
= 0;
89 for ( i
= 1; i
<= num_ed
; i
++ )
91 fscanf ( fp
, "%d", &num1
);
93 if ( EdSmallerThanTwin ( i
) )
95 fscanf ( fp
, "%d", &num2
);
96 bal_ed
= getTwinEdge ( i
);
98 if ( num1
+ num2
>= 255 )
100 edge_array
[i
].multi
= 255;
101 edge_array
[bal_ed
].multi
= 255;
105 edge_array
[i
].multi
= num1
+ num2
;
106 edge_array
[bal_ed
].multi
= num1
+ num2
;
107 markCounter
+= 2 * ( num1
+ num2
);
114 if ( 2 * num1
>= 255 )
116 edge_array
[i
].multi
= 255;
120 edge_array
[i
].multi
= 2 * num1
;
121 markCounter
+= 2 * num1
;
127 fprintf ( stderr
, "%lld markers overall.\n", markCounter
);
128 markersArray
= ( long long * ) ckalloc ( markCounter
* sizeof ( long long ) );
131 for ( i
= 1; i
<= num_ed
; i
++ )
133 if ( edge_array
[i
].multi
== 255 )
138 edge_array
[i
].markers
= markersArray
+ markCounter
;
139 markCounter
+= edge_array
[i
].multi
;
140 edge_array
[i
].multi
= 0;
143 sprintf ( name
, "%s.path", graphfile
);
144 fp
= fopen ( name
, "r" );
151 while ( fgets ( line
, sizeof ( line
), fp
) != NULL
)
155 seg
= strtok ( line
, " " );
159 edgeno
= atoi ( seg
);
160 //printf("%s, %d\n",seg,edgeno);
161 add1marker2edge ( edgeno
, readid
);
162 seg
= strtok ( NULL
, " " );
169 for ( i
= 1; i
<= num_ed
; i
++ )
171 if ( edge_array
[i
].multi
== 255 )
176 markCounter
+= edge_array
[i
].multi
;
179 fprintf ( stderr
, "%lld marks loaded.\n", markCounter
);
183 static int comp ( const void * a
, const void * b
)
186 m
= * ( long long * ) a
> 0 ? * ( long long * ) a
: -* ( long long * ) a
;
187 n
= * ( long long * ) b
> 0 ? * ( long long * ) b
: -* ( long long * ) b
;
189 // return (int)(m-n);
198 /*************************************************
202 1. Loads the path info.
203 2. Records the ids of reads crossing edges.
205 1. graphfile: the input prefix
209 0 if it's fail to load the path.
210 *************************************************/
212 boolean
loadPathBin ( char * graphfile
)
216 unsigned int i
, bal_ed
, num1
, num2
;
217 long long markCounter
= 0, readid
= 0;
218 unsigned char seg
, ch
;
219 unsigned int * freadBuf
;
220 sprintf ( name
, "%s.markOnEdge", graphfile
);
221 fp
= fopen ( name
, "r" );
228 for ( i
= 1; i
<= num_ed
; i
++ )
230 edge_array
[i
].multi
= 0;
231 edge_array
[i
].markers
= NULL
;
234 for ( i
= 1; i
<= num_ed
; i
++ )
236 fscanf ( fp
, "%d", &num1
);
238 if ( EdSmallerThanTwin ( i
) )
240 fscanf ( fp
, "%d", &num2
);
241 bal_ed
= getTwinEdge ( i
);
243 if ( num1
+ num2
>= 255 )
245 edge_array
[i
].multi
= 255;
246 edge_array
[bal_ed
].multi
= 255;
250 edge_array
[i
].multi
= num1
+ num2
;
251 edge_array
[bal_ed
].multi
= num1
+ num2
;
252 markCounter
+= 2 * ( num1
+ num2
);
259 if ( 2 * num1
>= 255 )
261 edge_array
[i
].multi
= 255;
265 edge_array
[i
].multi
= 2 * num1
;
266 markCounter
+= 2 * num1
;
272 fprintf ( stderr
, "%lld markers overall.\n", markCounter
);
273 markersArray
= ( long long * ) ckalloc ( markCounter
* sizeof ( long long ) );
276 for ( i
= 1; i
<= num_ed
; i
++ )
278 if ( edge_array
[i
].multi
== 255 )
283 edge_array
[i
].markers
= markersArray
+ markCounter
;
284 markCounter
+= edge_array
[i
].multi
;
285 edge_array
[i
].multi
= 0;
288 sprintf ( name
, "%s.path", graphfile
);
289 fp
= fopen ( name
, "rb" );
296 freadBuf
= ( unsigned int * ) ckalloc ( ( maxReadLen
- overlaplen
+ 1 ) * sizeof ( unsigned int ) );
298 while ( fread ( &ch
, sizeof ( char ), 1, fp
) == 1 )
301 if ( fread ( freadBuf
, sizeof ( unsigned int ), ch
, fp
) != ch
)
308 for ( seg
= 0; seg
< ch
; seg
++ )
310 add1marker2edge ( freadBuf
[seg
], readid
);
317 for ( i
= 1; i
<= num_ed
; i
++ )
319 if ( edge_array
[i
].multi
== 255 )
324 markCounter
+= edge_array
[i
].multi
;
327 for ( i
= 0; i
<= num_ed
; i
++ )
329 if ( edge_array
[i
].multi
>= 2 && edge_array
[i
].multi
!= 255 )
330 { qsort ( edge_array
[i
].markers
, ( int ) edge_array
[i
].multi
, sizeof ( long long ), comp
); }
333 fprintf ( stderr
, "%lld markers loaded.\n", markCounter
);
334 free ( ( void * ) freadBuf
);