modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / htslib / textutils.c
blob54ac9c8106c14df30be6e9886ca9b6009a6de680
1 /* textutils.c -- non-bioinformatics utility routines for text etc.
3 Copyright (C) 2016 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
25 #include <config.h>
27 #include <stdio.h>
28 #include <string.h>
30 #include "htslib/hfile.h"
31 #include "htslib/kstring.h"
33 #include "hts_internal.h"
35 static int dehex(char c)
37 if (c >= 'a' && c <= 'f') return c - 'a' + 10;
38 else if (c >= 'A' && c <= 'F') return c - 'A' + 10;
39 else if (c >= '0' && c <= '9') return c - '0';
40 else return -1; // Hence dehex('\0') = -1
43 int hts_decode_percent(char *dest, size_t *destlen, const char *s)
45 char *d = dest;
46 int hi, lo;
48 while (*s) {
49 if (*s == '%' && (hi = dehex(s[1])) >= 0 && (lo = dehex(s[2])) >= 0) {
50 *d++ = (hi << 4) | lo;
51 s += 3;
53 else *d++ = *s++;
56 *d = '\0';
57 *destlen = d - dest;
58 return 0;
61 static int debase64(char c)
63 if (c >= 'a' && c <= 'z') return c - 'a' + 26;
64 else if (c >= 'A' && c <= 'Z') return c - 'A';
65 else if (c >= '0' && c <= '9') return c - '0' + 52;
66 else if (c == '/') return 63;
67 else if (c == '+') return 62;
68 else return -1; // Hence debase64('\0') = -1
71 size_t hts_base64_decoded_length(size_t len)
73 size_t nquartets = (len + 2) / 4;
74 return 3 * nquartets;
77 int hts_decode_base64(char *dest, size_t *destlen, const char *s)
79 char *d = dest;
80 int x0, x1, x2, x3;
82 while (1) {
83 x0 = debase64(*s++);
84 x1 = (x0 >= 0)? debase64(*s++) : -1;
85 x2 = (x1 >= 0)? debase64(*s++) : -1;
86 x3 = (x2 >= 0)? debase64(*s++) : -1;
87 if (x3 < 0) break;
89 *d++ = (x0 << 2) | (x1 >> 4);
90 *d++ = (x1 << 4) | (x2 >> 2);
91 *d++ = (x2 << 6) | x3;
94 if (x1 >= 0) *d++ = (x0 << 2) | (x1 >> 4);
95 if (x2 >= 0) *d++ = (x1 << 4) | (x2 >> 2);
97 *destlen = d - dest;
98 return 0;
101 static char *encode_utf8(char *s, unsigned x)
103 if (x >= 0x10000) {
104 *s++ = 0xF0 | (x >> 18);
105 *s++ = 0x80 | ((x >> 12) & 0x3F);
106 *s++ = 0x80 | ((x >> 6) & 0x3F);
107 *s++ = 0x80 | (x & 0x3F);
109 else if (x >= 0x800) {
110 *s++ = 0xE0 | (x >> 12);
111 *s++ = 0x80 | ((x >> 6) & 0x3F);
112 *s++ = 0x80 | (x & 0x3F);
114 else if (x >= 0x80) {
115 *s++ = 0xC0 | (x >> 6);
116 *s++ = 0x80 | (x & 0x3F);
118 else *s++ = x;
120 return s;
123 static char *sscan_string(char *s)
125 char *d = s;
126 int d1, d2, d3, d4;
128 for (;;) switch (*s) {
129 case '\\':
130 switch (s[1]) {
131 case '\0': *d = '\0'; return s+1;
132 case 'b': *d++ = '\b'; s += 2; break;
133 case 'f': *d++ = '\f'; s += 2; break;
134 case 'n': *d++ = '\n'; s += 2; break;
135 case 'r': *d++ = '\r'; s += 2; break;
136 case 't': *d++ = '\t'; s += 2; break;
137 default: *d++ = s[1]; s += 2; break;
138 case 'u':
139 if ((d1 = dehex(s[2])) >= 0 && (d2 = dehex(s[3])) >= 0 &&
140 (d3 = dehex(s[4])) >= 0 && (d4 = dehex(s[5])) >= 0) {
141 d = encode_utf8(d, d1 << 12 | d2 << 8 | d3 << 4 | d4);
142 s += 6;
144 break;
146 break;
148 case '"':
149 *d = '\0';
150 return s+1;
152 case '\0':
153 *d = '\0';
154 return s;
156 default:
157 *d++ = *s++;
158 break;
162 static void fscan_string(hFILE *fp, kstring_t *d)
164 int c, d1, d2, d3, d4;
166 while ((c = hgetc(fp)) != EOF) switch (c) {
167 case '\\':
168 if ((c = hgetc(fp)) == EOF) return;
169 switch (c) {
170 case 'b': kputc('\b', d); break;
171 case 'f': kputc('\f', d); break;
172 case 'n': kputc('\n', d); break;
173 case 'r': kputc('\r', d); break;
174 case 't': kputc('\t', d); break;
175 default: kputc(c, d); break;
176 case 'u':
177 if ((c = hgetc(fp)) != EOF && (d1 = dehex(c)) >= 0 &&
178 (c = hgetc(fp)) != EOF && (d2 = dehex(c)) >= 0 &&
179 (c = hgetc(fp)) != EOF && (d3 = dehex(c)) >= 0 &&
180 (c = hgetc(fp)) != EOF && (d4 = dehex(c)) >= 0) {
181 char buf[8];
182 char *lim = encode_utf8(buf, d1 << 12 | d2 << 8 | d3 << 4 | d4);
183 kputsn(buf, lim - buf, d);
185 break;
187 break;
189 case '"':
190 return;
192 default:
193 kputc(c, d);
194 break;
198 static char token_type(hts_json_token *token)
200 const char *s = token->str;
202 switch (*s) {
203 case 'f':
204 return (strcmp(s, "false") == 0)? 'b' : '?';
205 case 'n':
206 return (strcmp(s, "null") == 0)? '.' : '?';
207 case 't':
208 return (strcmp(s, "true") == 0)? 'b' : '?';
209 case '-':
210 case '0': case '1': case '2': case '3': case '4':
211 case '5': case '6': case '7': case '8': case '9':
212 return 'n';
213 default:
214 return '?';
218 char hts_json_snext(char *str, size_t *state, hts_json_token *token)
220 char *s = &str[*state >> 2];
221 int hidden = *state & 3;
223 if (hidden) {
224 *state &= ~3;
225 return token->type = "?}]?"[hidden];
228 #define STATE(s,h) (((s) - str) << 2 | (h))
230 for (;;) switch (*s) {
231 case ' ':
232 case '\t':
233 case '\r':
234 case '\n':
235 case ',':
236 case ':':
237 s++;
238 continue;
240 case '\0':
241 return token->type = '\0';
243 case '{':
244 case '[':
245 case '}':
246 case ']':
247 *state = STATE(s+1, 0);
248 return token->type = *s;
250 case '"':
251 token->str = s+1;
252 *state = STATE(sscan_string(s+1), 0);
253 return token->type = 's';
255 default:
256 token->str = s;
257 s += strcspn(s, " \t\r\n,]}");
258 hidden = (*s == '}')? 1 : (*s == ']')? 2 : 0;
259 if (*s != '\0') *s++ = '\0';
260 *state = STATE(s, hidden);
261 return token->type = token_type(token);
264 #undef STATE
267 char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr)
269 char peek;
270 int c;
272 for (;;) switch (c = hgetc(fp)) {
273 case ' ':
274 case '\t':
275 case '\r':
276 case '\n':
277 case ',':
278 case ':':
279 continue;
281 case EOF:
282 return token->type = '\0';
284 case '{':
285 case '[':
286 case '}':
287 case ']':
288 return token->type = c;
290 case '"':
291 kstr->l = 0;
292 fscan_string(fp, kstr);
293 if (kstr->l == 0) kputsn("", 0, kstr);
294 token->str = kstr->s;
295 return token->type = 's';
297 default:
298 kstr->l = 0;
299 kputc(c, kstr);
300 while (hpeek(fp, &peek, 1) == 1 && !strchr(" \t\r\n,]}", peek)) {
301 if ((c = hgetc(fp)) == EOF) break;
302 kputc(c, kstr);
304 token->str = kstr->s;
305 return token->type = token_type(token);
310 typedef char hts_json_nextfn(void *arg1, void *arg2, hts_json_token *token);
312 static char skip_value(char type, hts_json_nextfn *next, void *arg1, void *arg2)
314 hts_json_token token;
315 int level;
317 switch (type? type : next(arg1, arg2, &token)) {
318 case '\0':
319 return '\0';
321 case '?':
322 case '}':
323 case ']':
324 return '?';
326 case '{':
327 case '[':
328 level = 1;
329 break;
331 default:
332 return 'v';
335 while (level > 0)
336 switch (next(arg1, arg2, &token)) {
337 case '\0':
338 return '\0';
340 case '?':
341 return '?';
343 case '{':
344 case '[':
345 level++;
346 break;
348 case '}':
349 case ']':
350 --level;
351 break;
353 default:
354 break;
357 return 'v';
360 static char snext(void *arg1, void *arg2, hts_json_token *token)
362 return hts_json_snext(arg1, arg2, token);
364 char hts_json_sskip_value(char *str, size_t *state, char type)
366 return skip_value(type, snext, str, state);
369 static char fnext(void *arg1, void *arg2, hts_json_token *token)
371 return hts_json_fnext(arg1, token, arg2);
373 char hts_json_fskip_value(struct hFILE *fp, char type)
375 kstring_t str = { 0, 0, NULL };
376 char ret = skip_value(type, fnext, fp, &str);
377 free(str.s);
378 return ret;