8 int kvsprintf(kstring_t
*s
, const char *fmt
, va_list ap
)
13 l
= vsnprintf(s
->s
+ s
->l
, s
->m
- s
->l
, fmt
, args
); // This line does not work with glibc 2.0. See `man snprintf'.
15 if (l
+ 1 > s
->m
- s
->l
) {
18 s
->s
= (char*)realloc(s
->s
, s
->m
);
20 l
= vsnprintf(s
->s
+ s
->l
, s
->m
- s
->l
, fmt
, args
);
27 int ksprintf(kstring_t
*s
, const char *fmt
, ...)
32 l
= kvsprintf(s
, fmt
, ap
);
37 char *kstrtok(const char *str
, const char *sep_in
, ks_tokaux_t
*aux
)
39 const unsigned char *p
, *start
, *sep
= (unsigned char *) sep_in
;
40 if (sep
) { // set up the table
41 if (str
== 0 && aux
->finished
) return 0; // no need to set up if we have finished
43 if (sep
[0] && sep
[1]) {
45 aux
->tab
[0] = aux
->tab
[1] = aux
->tab
[2] = aux
->tab
[3] = 0;
46 for (p
= sep
; *p
; ++p
) aux
->tab
[*p
>>6] |= 1ull<<(*p
&0x3f);
47 } else aux
->sep
= sep
[0];
49 if (aux
->finished
) return 0;
50 else if (str
) start
= (unsigned char *) str
, aux
->finished
= 0;
51 else start
= (unsigned char *) aux
->p
+ 1;
53 for (p
= start
; *p
; ++p
)
54 if (aux
->tab
[*p
>>6]>>(*p
&0x3f)&1) break;
56 for (p
= start
; *p
; ++p
)
57 if (*p
== aux
->sep
) break;
59 aux
->p
= (const char *) p
; // end of token
60 if (*p
== 0) aux
->finished
= 1; // no more tokens
64 // s MUST BE a null terminated string; l = strlen(s)
65 int ksplit_core(char *s
, int delimiter
, int *_max
, int **_offsets
)
67 int i
, n
, max
, last_char
, last_start
, *offsets
, l
;
68 n
= 0; max
= *_max
; offsets
= *_offsets
;
71 #define __ksplit_aux do { \
76 max = max? max<<1 : 2; \
77 if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \
85 offsets[n++] = last_start; \
89 for (i
= 0, last_char
= last_start
= 0; i
<= l
; ++i
) {
91 if (isspace(s
[i
]) || s
[i
] == 0) {
92 if (isgraph(last_char
)) __ksplit_aux
; // the end of a field
94 if (isspace(last_char
) || last_char
== 0) last_start
= i
;
97 if (s
[i
] == delimiter
|| s
[i
] == 0) {
98 if (last_char
!= 0 && last_char
!= delimiter
) __ksplit_aux
; // the end of a field
100 if (last_char
== delimiter
|| last_char
== 0) last_start
= i
;
105 *_max
= max
; *_offsets
= offsets
;
109 int kgetline(kstring_t
*s
, kgets_func
*fgets_fn
, void *fp
)
113 while (s
->l
== l0
|| s
->s
[s
->l
-1] != '\n') {
114 if (s
->m
- s
->l
< 200) ks_resize(s
, s
->m
+ 200);
115 if (fgets_fn(s
->s
+ s
->l
, s
->m
- s
->l
, fp
) == NULL
) break;
116 s
->l
+= strlen(s
->s
+ s
->l
);
119 if (s
->l
== l0
) return EOF
;
121 if (s
->l
> l0
&& s
->s
[s
->l
-1] == '\n') {
123 if (s
->l
> l0
&& s
->s
[s
->l
-1] == '\r') s
->l
--;
129 /**********************
130 * Boyer-Moore search *
131 **********************/
133 typedef unsigned char ubyte_t
;
135 // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
136 static int *ksBM_prep(const ubyte_t
*pat
, int m
)
138 int i
, *suff
, *prep
, *bmGs
, *bmBc
;
139 prep
= (int*)calloc(m
+ 256, sizeof(int));
140 bmGs
= prep
; bmBc
= prep
+ m
;
142 for (i
= 0; i
< 256; ++i
) bmBc
[i
] = m
;
143 for (i
= 0; i
< m
- 1; ++i
) bmBc
[pat
[i
]] = m
- i
- 1;
145 suff
= (int*)calloc(m
, sizeof(int));
150 for (i
= m
- 2; i
>= 0; --i
) {
151 if (i
> g
&& suff
[i
+ m
- 1 - f
] < i
- g
)
152 suff
[i
] = suff
[i
+ m
- 1 - f
];
156 while (g
>= 0 && pat
[g
] == pat
[g
+ m
- 1 - f
]) --g
;
163 for (i
= 0; i
< m
; ++i
) bmGs
[i
] = m
;
164 for (i
= m
- 1; i
>= 0; --i
)
165 if (suff
[i
] == i
+ 1)
166 for (; j
< m
- 1 - i
; ++j
)
169 for (i
= 0; i
<= m
- 2; ++i
)
170 bmGs
[m
- 1 - suff
[i
]] = m
- 1 - i
;
176 void *kmemmem(const void *_str
, int n
, const void *_pat
, int m
, int **_prep
)
178 int i
, j
, *prep
= 0, *bmGs
, *bmBc
;
179 const ubyte_t
*str
, *pat
;
180 str
= (const ubyte_t
*)_str
; pat
= (const ubyte_t
*)_pat
;
181 prep
= (_prep
== 0 || *_prep
== 0)? ksBM_prep(pat
, m
) : *_prep
;
182 if (_prep
&& *_prep
== 0) *_prep
= prep
;
183 bmGs
= prep
; bmBc
= prep
+ m
;
186 for (i
= m
- 1; i
>= 0 && pat
[i
] == str
[i
+j
]; --i
);
188 int max
= bmBc
[str
[i
+j
]] - m
+ 1 + i
;
189 if (max
< bmGs
[i
]) max
= bmGs
[i
];
191 } else return (void*)(str
+ j
);
193 if (_prep
== 0) free(prep
);
197 char *kstrstr(const char *str
, const char *pat
, int **_prep
)
199 return (char*)kmemmem(str
, strlen(str
), pat
, strlen(pat
), _prep
);
202 char *kstrnstr(const char *str
, const char *pat
, int n
, int **_prep
)
204 return (char*)kmemmem(str
, n
, pat
, strlen(pat
), _prep
);
207 /***********************
208 * The main() function *
209 ***********************/
219 s
= (kstring_t
*)calloc(1, sizeof(kstring_t
));
221 ksprintf(s
, " abcdefg: %d ", 100);
222 printf("'%s'\n", s
->s
);
224 fields
= ksplit(s
, 0, &n
);
225 for (i
= 0; i
< n
; ++i
)
226 printf("field[%d] = '%s'\n", i
, s
->s
+ fields
[i
]);
229 for (p
= kstrtok("ab:cde:fg/hij::k", ":/", &aux
); p
; p
= kstrtok(0, 0, &aux
)) {
230 kputsn(p
, aux
.p
- p
, s
);
235 free(s
->s
); free(s
); free(fields
);
238 static char *str
= "abcdefgcdgcagtcakcdcd";
239 static char *pat
= "cd";
242 while ((ret
= kstrstr(s
, pat
, &prep
)) != 0) {
243 printf("match: %s\n", ret
);