modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / htslib / kstring.c
blob06e8cdf634243333fed97a6eb7b325aeb6df9d3f
1 /* The MIT License
3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
26 #include <config.h>
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <string.h>
32 #include <stdint.h>
33 #include <math.h>
34 #include "htslib/kstring.h"
36 int kputd(double d, kstring_t *s) {
37 int len = 0;
38 char buf[21], *cp = buf+20, *ep;
39 if (d == 0) {
40 if (signbit(d)) {
41 kputsn("-0",2,s);
42 return 2;
43 } else {
44 kputsn("0",1,s);
45 return 1;
49 if (d < 0) {
50 kputc('-',s);
51 len = 1;
52 d=-d;
54 if (!(d >= 0.0001 && d <= 999999)) {
55 if (ks_resize(s, s->l + 50) < 0)
56 return EOF;
57 // We let stdio handle the exponent cases
58 int s2 = sprintf(s->s + s->l, "%g", d);
59 len += s2;
60 s->l += s2;
61 return len;
64 uint64_t i = d*10000000000;
65 // Correction for rounding - rather ugly
67 // Optimised for small numbers.
68 // Better still would be __builtin_clz on hi/lo 32 and get the
69 // starting point very rapidly.
70 if (d<.0001)
71 i+=0;
72 else if (d<0.001)
73 i+=5;
74 else if (d < 0.01)
75 i+=50;
76 else if (d < 0.1)
77 i+=500;
78 else if (d < 1)
79 i+=5000;
80 else if (d < 10)
81 i+=50000;
82 else if (d < 100)
83 i+=500000;
84 else if (d < 1000)
85 i+=5000000;
86 else if (d < 10000)
87 i+=50000000;
88 else if (d < 100000)
89 i+=500000000;
90 else
91 i+=5000000000;
93 do {
94 *--cp = '0' + i%10;
95 i /= 10;
96 } while (i >= 1);
97 buf[20] = 0;
98 int p = buf+20-cp;
99 if (p <= 10) { // d < 1
100 //assert(d/1);
101 cp[6] = 0; ep = cp+5;// 6 precision
102 while (p < 10) {
103 *--cp = '0';
104 p++;
106 *--cp = '.';
107 *--cp = '0';
108 } else {
109 char *xp = --cp;
110 while (p > 10) {
111 xp[0] = xp[1];
112 p--;
113 xp++;
115 xp[0] = '.';
116 cp[7] = 0; ep=cp+6;
117 if (cp[6] == '.') cp[6] = 0;
120 // Cull trailing zeros
121 while (*ep == '0' && ep > cp)
122 ep--;
123 char *z = ep+1;
124 while (ep > cp) {
125 if (*ep == '.') {
126 if (z[-1] == '.')
127 z[-1] = 0;
128 else
129 z[0] = 0;
130 break;
132 ep--;
135 int sl = strlen(cp);
136 len += sl;
137 kputsn(cp, sl, s);
138 return len;
141 int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
143 va_list args;
144 int l;
145 va_copy(args, ap);
147 if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) {
148 double d = va_arg(args, double);
149 l = kputd(d, s);
150 va_end(args);
151 return l;
154 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
155 va_end(args);
156 if (l + 1 > s->m - s->l) {
157 if (ks_resize(s, s->l + l + 2) < 0)
158 return -1;
159 va_copy(args, ap);
160 l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
161 va_end(args);
163 s->l += l;
164 return l;
167 int ksprintf(kstring_t *s, const char *fmt, ...)
169 va_list ap;
170 int l;
171 va_start(ap, fmt);
172 l = kvsprintf(s, fmt, ap);
173 va_end(ap);
174 return l;
177 char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux)
179 const char *p, *start;
180 if (sep) { // set up the table
181 if (str == 0 && (aux->tab[0]&1)) return 0; // no need to set up if we have finished
182 aux->finished = 0;
183 if (sep[1]) {
184 aux->sep = -1;
185 aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
186 for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
187 } else aux->sep = sep[0];
189 if (aux->finished) return 0;
190 else if (str) aux->p = str - 1, aux->finished = 0;
191 if (aux->sep < 0) {
192 for (p = start = aux->p + 1; *p; ++p)
193 if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
194 } else {
195 for (p = start = aux->p + 1; *p; ++p)
196 if (*p == aux->sep) break;
198 aux->p = p; // end of token
199 if (*p == 0) aux->finished = 1; // no more tokens
200 return (char*)start;
203 // s MUST BE a null terminated string; l = strlen(s)
204 int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
206 int i, n, max, last_char, last_start, *offsets, l;
207 n = 0; max = *_max; offsets = *_offsets;
208 l = strlen(s);
210 #define __ksplit_aux do { \
211 if (_offsets) { \
212 s[i] = 0; \
213 if (n == max) { \
214 int *tmp; \
215 max = max? max<<1 : 2; \
216 if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
217 offsets = tmp; \
218 } else { \
219 free(offsets); \
220 *_offsets = NULL; \
221 return 0; \
224 offsets[n++] = last_start; \
225 } else ++n; \
226 } while (0)
228 for (i = 0, last_char = last_start = 0; i <= l; ++i) {
229 if (delimiter == 0) {
230 if (isspace(s[i]) || s[i] == 0) {
231 if (isgraph(last_char)) __ksplit_aux; // the end of a field
232 } else {
233 if (isspace(last_char) || last_char == 0) last_start = i;
235 } else {
236 if (s[i] == delimiter || s[i] == 0) {
237 if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
238 } else {
239 if (last_char == delimiter || last_char == 0) last_start = i;
242 last_char = s[i];
244 *_max = max; *_offsets = offsets;
245 return n;
248 int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp)
250 size_t l0 = s->l;
252 while (s->l == l0 || s->s[s->l-1] != '\n') {
253 if (s->m - s->l < 200) {
254 if (ks_resize(s, s->m + 200) < 0)
255 return EOF;
257 if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break;
258 s->l += strlen(s->s + s->l);
261 if (s->l == l0) return EOF;
263 if (s->l > l0 && s->s[s->l-1] == '\n') {
264 s->l--;
265 if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
267 s->s[s->l] = '\0';
268 return 0;
271 /**********************
272 * Boyer-Moore search *
273 **********************/
275 typedef unsigned char ubyte_t;
277 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
278 static int *ksBM_prep(const ubyte_t *pat, int m)
280 int i, *suff, *prep, *bmGs, *bmBc;
281 prep = (int*)calloc(m + 256, sizeof(int));
282 bmGs = prep; bmBc = prep + m;
283 { // preBmBc()
284 for (i = 0; i < 256; ++i) bmBc[i] = m;
285 for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
287 suff = (int*)calloc(m, sizeof(int));
288 { // suffixes()
289 int f = 0, g;
290 suff[m - 1] = m;
291 g = m - 1;
292 for (i = m - 2; i >= 0; --i) {
293 if (i > g && suff[i + m - 1 - f] < i - g)
294 suff[i] = suff[i + m - 1 - f];
295 else {
296 if (i < g) g = i;
297 f = i;
298 while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
299 suff[i] = f - g;
303 { // preBmGs()
304 int j = 0;
305 for (i = 0; i < m; ++i) bmGs[i] = m;
306 for (i = m - 1; i >= 0; --i)
307 if (suff[i] == i + 1)
308 for (; j < m - 1 - i; ++j)
309 if (bmGs[j] == m)
310 bmGs[j] = m - 1 - i;
311 for (i = 0; i <= m - 2; ++i)
312 bmGs[m - 1 - suff[i]] = m - 1 - i;
314 free(suff);
315 return prep;
318 void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
320 int i, j, *prep = 0, *bmGs, *bmBc;
321 const ubyte_t *str, *pat;
322 str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
323 prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
324 if (_prep && *_prep == 0) *_prep = prep;
325 bmGs = prep; bmBc = prep + m;
326 j = 0;
327 while (j <= n - m) {
328 for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
329 if (i >= 0) {
330 int max = bmBc[str[i+j]] - m + 1 + i;
331 if (max < bmGs[i]) max = bmGs[i];
332 j += max;
333 } else return (void*)(str + j);
335 if (_prep == 0) free(prep);
336 return 0;
339 char *kstrstr(const char *str, const char *pat, int **_prep)
341 return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
344 char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
346 return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
349 /***********************
350 * The main() function *
351 ***********************/
353 #ifdef KSTRING_MAIN
354 #include <stdio.h>
355 int main()
357 kstring_t *s;
358 int *fields, n, i;
359 ks_tokaux_t aux;
360 char *p;
361 s = (kstring_t*)calloc(1, sizeof(kstring_t));
362 // test ksprintf()
363 ksprintf(s, " abcdefg: %d ", 100);
364 printf("'%s'\n", s->s);
365 // test ksplit()
366 fields = ksplit(s, 0, &n);
367 for (i = 0; i < n; ++i)
368 printf("field[%d] = '%s'\n", i, s->s + fields[i]);
369 // test kstrtok()
370 s->l = 0;
371 for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
372 kputsn(p, aux.p - p, s);
373 kputc('\n', s);
375 printf("%s", s->s);
376 // free
377 free(s->s); free(s); free(fields);
380 static char *str = "abcdefgcdgcagtcakcdcd";
381 static char *pat = "cd";
382 char *ret, *s = str;
383 int *prep = 0;
384 while ((ret = kstrstr(s, pat, &prep)) != 0) {
385 printf("match: %s\n", ret);
386 s = ret + prep[0];
388 free(prep);
390 return 0;
392 #endif