4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 void print_kmer(FILE *fp,Kmer kmer,char c)
31 fprintf(fp,"%llx %llx %llx %llx",kmer.high1,kmer.low1,kmer.high2,kmer.low2);
36 /*************************************************
42 1. tightSeq: the sequence
43 2. len: the length of sequence
48 *************************************************/
50 void printTightString ( char * tightSeq
, int len
)
54 for ( i
= 0; i
< len
; i
++ )
56 fprintf ( stderr
, "%c", int2base ( ( int ) getCharInTightString ( tightSeq
, i
) ) );
58 if ( ( i
+ 1 ) % 100 == 0 && i
< len
- 1 )
60 fprintf ( stderr
, "\n" );
64 fprintf ( stderr
, "\n" );
67 /*************************************************
71 Writes base in specified position of sequence.
74 2. tightSeq: the sequence
80 *************************************************/
81 void writeChar2tightString ( char nt
, char * tightSeq
, int pos
)
83 char * byte
= tightSeq
+ pos
/ 4;
106 /*************************************************
110 Gets the base in sipcified pos of sequence.
112 1. tightSeq: the sequence
118 *************************************************/
119 char getCharInTightString ( char * tightSeq
, int pos
)
121 char * byte
= tightSeq
+ pos
/ 4;
126 return ( *byte
& 3 );
128 return ( *byte
& 12 ) >> 2;
130 return ( *byte
& 48 ) >> 4;
132 return ( *byte
& 192 ) >> 6;
138 /*************************************************
142 Gets the reverse complement of a sequence.
145 2. len: the length of the sequence
147 1. bal_seq: the reversed complement of the sequence
150 *************************************************/
151 void reverseComplementSeq ( char * seq
, int len
, char * bal_seq
)
160 for ( i
= len
- 1; i
>= 0; i
-- )
162 bal_seq
[index
++] = int_comp ( seq
[i
] );
168 /*************************************************
172 Gets the reverse complement of a sequence.
175 2. len: the length of the sequence
179 The reversed complement of sequence "seq".
180 *************************************************/
181 char * compl_int_seq ( char * seq
, int len
)
183 char * bal_seq
= NULL
, c
, bal_c
;
191 bal_seq
= ( char * ) ckalloc ( len
* sizeof ( char ) );
194 for ( i
= len
- 1; i
>= 0; i
-- )
200 bal_c
= int_comp ( c
);
207 bal_seq
[index
++] = bal_c
;
213 /*************************************************
217 Translates the prefix of a sequence to an integer.
219 1. seq: the sequence in the format "char"
220 2. len: the length of the prefix
225 *************************************************/
226 long long trans_seq ( char * seq
, int len
)
232 for ( i
= 0; i
< len
; i
++ )
234 res
= res
* 4 + seq
[i
];
241 char *kmer2seq(Kmer word)
247 seq = (char *)ckalloc(overlaplen*sizeof(char));
248 for(i=overlaplen-1;i>=0;i--){
249 seq[i] = charMask&word;