3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include "htslib/kstring.h"
36 int kputd(double d
, kstring_t
*s
) {
38 char buf
[21], *cp
= buf
+20, *ep
;
54 if (!(d
>= 0.0001 && d
<= 999999)) {
55 if (ks_resize(s
, s
->l
+ 50) < 0)
57 // We let stdio handle the exponent cases
58 int s2
= sprintf(s
->s
+ s
->l
, "%g", d
);
64 uint64_t i
= d
*10000000000;
65 // Correction for rounding - rather ugly
67 // Optimised for small numbers.
68 // Better still would be __builtin_clz on hi/lo 32 and get the
69 // starting point very rapidly.
99 if (p
<= 10) { // d < 1
101 cp
[6] = 0; ep
= cp
+5;// 6 precision
117 if (cp
[6] == '.') cp
[6] = 0;
120 // Cull trailing zeros
121 while (*ep
== '0' && ep
> cp
)
141 int kvsprintf(kstring_t
*s
, const char *fmt
, va_list ap
)
147 if (fmt
[0] == '%' && fmt
[1] == 'g' && fmt
[2] == 0) {
148 double d
= va_arg(args
, double);
154 l
= vsnprintf(s
->s
+ s
->l
, s
->m
- s
->l
, fmt
, args
); // This line does not work with glibc 2.0. See `man snprintf'.
156 if (l
+ 1 > s
->m
- s
->l
) {
157 if (ks_resize(s
, s
->l
+ l
+ 2) < 0)
160 l
= vsnprintf(s
->s
+ s
->l
, s
->m
- s
->l
, fmt
, args
);
167 int ksprintf(kstring_t
*s
, const char *fmt
, ...)
172 l
= kvsprintf(s
, fmt
, ap
);
177 char *kstrtok(const char *str
, const char *sep
, ks_tokaux_t
*aux
)
179 const char *p
, *start
;
180 if (sep
) { // set up the table
181 if (str
== 0 && (aux
->tab
[0]&1)) return 0; // no need to set up if we have finished
185 aux
->tab
[0] = aux
->tab
[1] = aux
->tab
[2] = aux
->tab
[3] = 0;
186 for (p
= sep
; *p
; ++p
) aux
->tab
[*p
>>6] |= 1ull<<(*p
&0x3f);
187 } else aux
->sep
= sep
[0];
189 if (aux
->finished
) return 0;
190 else if (str
) aux
->p
= str
- 1, aux
->finished
= 0;
192 for (p
= start
= aux
->p
+ 1; *p
; ++p
)
193 if (aux
->tab
[*p
>>6]>>(*p
&0x3f)&1) break;
195 for (p
= start
= aux
->p
+ 1; *p
; ++p
)
196 if (*p
== aux
->sep
) break;
198 aux
->p
= p
; // end of token
199 if (*p
== 0) aux
->finished
= 1; // no more tokens
203 // s MUST BE a null terminated string; l = strlen(s)
204 int ksplit_core(char *s
, int delimiter
, int *_max
, int **_offsets
)
206 int i
, n
, max
, last_char
, last_start
, *offsets
, l
;
207 n
= 0; max
= *_max
; offsets
= *_offsets
;
210 #define __ksplit_aux do { \
215 max = max? max<<1 : 2; \
216 if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
224 offsets[n++] = last_start; \
228 for (i
= 0, last_char
= last_start
= 0; i
<= l
; ++i
) {
229 if (delimiter
== 0) {
230 if (isspace(s
[i
]) || s
[i
] == 0) {
231 if (isgraph(last_char
)) __ksplit_aux
; // the end of a field
233 if (isspace(last_char
) || last_char
== 0) last_start
= i
;
236 if (s
[i
] == delimiter
|| s
[i
] == 0) {
237 if (last_char
!= 0 && last_char
!= delimiter
) __ksplit_aux
; // the end of a field
239 if (last_char
== delimiter
|| last_char
== 0) last_start
= i
;
244 *_max
= max
; *_offsets
= offsets
;
248 int kgetline(kstring_t
*s
, kgets_func
*fgets_fn
, void *fp
)
252 while (s
->l
== l0
|| s
->s
[s
->l
-1] != '\n') {
253 if (s
->m
- s
->l
< 200) {
254 if (ks_resize(s
, s
->m
+ 200) < 0)
257 if (fgets_fn(s
->s
+ s
->l
, s
->m
- s
->l
, fp
) == NULL
) break;
258 s
->l
+= strlen(s
->s
+ s
->l
);
261 if (s
->l
== l0
) return EOF
;
263 if (s
->l
> l0
&& s
->s
[s
->l
-1] == '\n') {
265 if (s
->l
> l0
&& s
->s
[s
->l
-1] == '\r') s
->l
--;
271 /**********************
272 * Boyer-Moore search *
273 **********************/
275 typedef unsigned char ubyte_t
;
277 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
278 static int *ksBM_prep(const ubyte_t
*pat
, int m
)
280 int i
, *suff
, *prep
, *bmGs
, *bmBc
;
281 prep
= (int*)calloc(m
+ 256, sizeof(int));
282 bmGs
= prep
; bmBc
= prep
+ m
;
284 for (i
= 0; i
< 256; ++i
) bmBc
[i
] = m
;
285 for (i
= 0; i
< m
- 1; ++i
) bmBc
[pat
[i
]] = m
- i
- 1;
287 suff
= (int*)calloc(m
, sizeof(int));
292 for (i
= m
- 2; i
>= 0; --i
) {
293 if (i
> g
&& suff
[i
+ m
- 1 - f
] < i
- g
)
294 suff
[i
] = suff
[i
+ m
- 1 - f
];
298 while (g
>= 0 && pat
[g
] == pat
[g
+ m
- 1 - f
]) --g
;
305 for (i
= 0; i
< m
; ++i
) bmGs
[i
] = m
;
306 for (i
= m
- 1; i
>= 0; --i
)
307 if (suff
[i
] == i
+ 1)
308 for (; j
< m
- 1 - i
; ++j
)
311 for (i
= 0; i
<= m
- 2; ++i
)
312 bmGs
[m
- 1 - suff
[i
]] = m
- 1 - i
;
318 void *kmemmem(const void *_str
, int n
, const void *_pat
, int m
, int **_prep
)
320 int i
, j
, *prep
= 0, *bmGs
, *bmBc
;
321 const ubyte_t
*str
, *pat
;
322 str
= (const ubyte_t
*)_str
; pat
= (const ubyte_t
*)_pat
;
323 prep
= (_prep
== 0 || *_prep
== 0)? ksBM_prep(pat
, m
) : *_prep
;
324 if (_prep
&& *_prep
== 0) *_prep
= prep
;
325 bmGs
= prep
; bmBc
= prep
+ m
;
328 for (i
= m
- 1; i
>= 0 && pat
[i
] == str
[i
+j
]; --i
);
330 int max
= bmBc
[str
[i
+j
]] - m
+ 1 + i
;
331 if (max
< bmGs
[i
]) max
= bmGs
[i
];
333 } else return (void*)(str
+ j
);
335 if (_prep
== 0) free(prep
);
339 char *kstrstr(const char *str
, const char *pat
, int **_prep
)
341 return (char*)kmemmem(str
, strlen(str
), pat
, strlen(pat
), _prep
);
344 char *kstrnstr(const char *str
, const char *pat
, int n
, int **_prep
)
346 return (char*)kmemmem(str
, n
, pat
, strlen(pat
), _prep
);
349 /***********************
350 * The main() function *
351 ***********************/
361 s
= (kstring_t
*)calloc(1, sizeof(kstring_t
));
363 ksprintf(s
, " abcdefg: %d ", 100);
364 printf("'%s'\n", s
->s
);
366 fields
= ksplit(s
, 0, &n
);
367 for (i
= 0; i
< n
; ++i
)
368 printf("field[%d] = '%s'\n", i
, s
->s
+ fields
[i
]);
371 for (p
= kstrtok("ab:cde:fg/hij::k", ":/", &aux
); p
; p
= kstrtok(0, 0, &aux
)) {
372 kputsn(p
, aux
.p
- p
, s
);
377 free(s
->s
); free(s
); free(fields
);
380 static char *str
= "abcdefgcdgcagtcakcdcd";
381 static char *pat
= "cd";
384 while ((ret
= kstrstr(s
, pat
, &prep
)) != 0) {
385 printf("match: %s\n", ret
);