modified: makefile
[GalaxyCodeBases.git] / BGI / SOAPdenovo2 / standardPregraph / seq.c
bloba24ce6ea7da7dc5581477a2d7b03a3b317bb8d11
1 /*
2 * seq.c
4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
23 #include "stdinc.h"
24 #include "newhash.h"
25 #include "kmerhash.h"
26 #include "extfunc.h"
27 #include "extvab.h"
29 void print_kmer(FILE *fp,Kmer kmer,char c)
31 fprintf(fp,"%llx %llx %llx %llx",kmer.high1,kmer.low1,kmer.high2,kmer.low2);
32 fprintf(fp,"%c",c);
34 }*/
36 /*************************************************
37 Function:
38 printTightString
39 Description:
40 Outputs the sequence.
41 Input:
42 1. tightSeq: the sequence
43 2. len: the length of sequence
44 Output:
45 None.
46 Return:
47 None.
48 *************************************************/
50 void printTightString ( char * tightSeq, int len )
52 int i;
54 for ( i = 0; i < len; i++ )
56 fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( tightSeq, i ) ) );
58 if ( ( i + 1 ) % 100 == 0 && i < len - 1 )
60 fprintf ( stderr, "\n" );
64 fprintf ( stderr, "\n" );
67 /*************************************************
68 Function:
69 writeChar2tightString
70 Description:
71 Writes base in specified position of sequence.
72 Input:
73 1. nt: the base
74 2. tightSeq: the sequence
75 3. pos: the position
76 Output:
77 None.
78 Return:
79 None.
80 *************************************************/
81 void writeChar2tightString ( char nt, char * tightSeq, int pos )
83 char * byte = tightSeq + pos / 4;
85 switch ( pos % 4 )
87 case 0:
88 *byte &= 63;
89 *byte += nt << 6;
90 return;
91 case 1:
92 *byte &= 207;
93 *byte += nt << 4;
94 return;
95 case 2:
96 *byte &= 243;
97 *byte += nt << 2;
98 return;
99 case 3:
100 *byte &= 252;
101 *byte += nt;
102 return;
106 /*************************************************
107 Function:
108 getCharInTightString
109 Description:
110 Gets the base in sipcified pos of sequence.
111 Input:
112 1. tightSeq: the sequence
113 2. pos: the position
114 Output:
115 None.
116 Return:
117 The target base.
118 *************************************************/
119 char getCharInTightString ( char * tightSeq, int pos )
121 char * byte = tightSeq + pos / 4;
123 switch ( pos % 4 )
125 case 3:
126 return ( *byte & 3 );
127 case 2:
128 return ( *byte & 12 ) >> 2;
129 case 1:
130 return ( *byte & 48 ) >> 4;
131 case 0:
132 return ( *byte & 192 ) >> 6;
135 return 0;
138 /*************************************************
139 Function:
140 reverseComplementSeq
141 Description:
142 Gets the reverse complement of a sequence.
143 Input:
144 1. seq: the sequence
145 2. len: the length of the sequence
146 Output:
147 1. bal_seq: the reversed complement of the sequence
148 Return:
149 None.
150 *************************************************/
151 void reverseComplementSeq ( char * seq, int len, char * bal_seq )
153 int i, index = 0;
155 if ( len < 1 )
157 return;
160 for ( i = len - 1; i >= 0; i-- )
162 bal_seq[index++] = int_comp ( seq[i] );
165 return;
168 /*************************************************
169 Function:
170 compl_int_seq
171 Description:
172 Gets the reverse complement of a sequence.
173 Input:
174 1. seq: the sequence
175 2. len: the length of the sequence
176 Output:
177 None.
178 Return:
179 The reversed complement of sequence "seq".
180 *************************************************/
181 char * compl_int_seq ( char * seq, int len )
183 char * bal_seq = NULL, c, bal_c;
184 int i, index;
186 if ( len < 1 )
188 return bal_seq;
191 bal_seq = ( char * ) ckalloc ( len * sizeof ( char ) );
192 index = 0;
194 for ( i = len - 1; i >= 0; i-- )
196 c = seq[i];
198 if ( c < 4 )
200 bal_c = int_comp ( c );
201 } //3-c;
202 else
204 bal_c = c;
207 bal_seq[index++] = bal_c;
210 return bal_seq;
213 /*************************************************
214 Function:
215 trans_seq
216 Description:
217 Translates the prefix of a sequence to an integer.
218 Input:
219 1. seq: the sequence in the format "char"
220 2. len: the length of the prefix
221 Output:
222 None.
223 Return:
224 The integer.
225 *************************************************/
226 long long trans_seq ( char * seq, int len )
228 int i;
229 long long res;
230 res = 0;
232 for ( i = 0; i < len; i++ )
234 res = res * 4 + seq[i];
237 return ( res );
241 char *kmer2seq(Kmer word)
243 int i;
244 char *seq;
245 Kmer charMask = 3;
247 seq = (char *)ckalloc(overlaplen*sizeof(char));
248 for(i=overlaplen-1;i>=0;i--){
249 seq[i] = charMask&word;
250 word >>= 2;
252 return seq;