1 /* textutils.c -- non-bioinformatics utility routines for text etc.
3 Copyright (C) 2016 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
30 #include "htslib/hfile.h"
31 #include "htslib/kstring.h"
33 #include "hts_internal.h"
35 static int dehex(char c
)
37 if (c
>= 'a' && c
<= 'f') return c
- 'a' + 10;
38 else if (c
>= 'A' && c
<= 'F') return c
- 'A' + 10;
39 else if (c
>= '0' && c
<= '9') return c
- '0';
40 else return -1; // Hence dehex('\0') = -1
43 int hts_decode_percent(char *dest
, size_t *destlen
, const char *s
)
49 if (*s
== '%' && (hi
= dehex(s
[1])) >= 0 && (lo
= dehex(s
[2])) >= 0) {
50 *d
++ = (hi
<< 4) | lo
;
61 static int debase64(char c
)
63 if (c
>= 'a' && c
<= 'z') return c
- 'a' + 26;
64 else if (c
>= 'A' && c
<= 'Z') return c
- 'A';
65 else if (c
>= '0' && c
<= '9') return c
- '0' + 52;
66 else if (c
== '/') return 63;
67 else if (c
== '+') return 62;
68 else return -1; // Hence debase64('\0') = -1
71 size_t hts_base64_decoded_length(size_t len
)
73 size_t nquartets
= (len
+ 2) / 4;
77 int hts_decode_base64(char *dest
, size_t *destlen
, const char *s
)
84 x1
= (x0
>= 0)? debase64(*s
++) : -1;
85 x2
= (x1
>= 0)? debase64(*s
++) : -1;
86 x3
= (x2
>= 0)? debase64(*s
++) : -1;
89 *d
++ = (x0
<< 2) | (x1
>> 4);
90 *d
++ = (x1
<< 4) | (x2
>> 2);
91 *d
++ = (x2
<< 6) | x3
;
94 if (x1
>= 0) *d
++ = (x0
<< 2) | (x1
>> 4);
95 if (x2
>= 0) *d
++ = (x1
<< 4) | (x2
>> 2);
101 static char *encode_utf8(char *s
, unsigned x
)
104 *s
++ = 0xF0 | (x
>> 18);
105 *s
++ = 0x80 | ((x
>> 12) & 0x3F);
106 *s
++ = 0x80 | ((x
>> 6) & 0x3F);
107 *s
++ = 0x80 | (x
& 0x3F);
109 else if (x
>= 0x800) {
110 *s
++ = 0xE0 | (x
>> 12);
111 *s
++ = 0x80 | ((x
>> 6) & 0x3F);
112 *s
++ = 0x80 | (x
& 0x3F);
114 else if (x
>= 0x80) {
115 *s
++ = 0xC0 | (x
>> 6);
116 *s
++ = 0x80 | (x
& 0x3F);
123 static char *sscan_string(char *s
)
128 for (;;) switch (*s
) {
131 case '\0': *d
= '\0'; return s
+1;
132 case 'b': *d
++ = '\b'; s
+= 2; break;
133 case 'f': *d
++ = '\f'; s
+= 2; break;
134 case 'n': *d
++ = '\n'; s
+= 2; break;
135 case 'r': *d
++ = '\r'; s
+= 2; break;
136 case 't': *d
++ = '\t'; s
+= 2; break;
137 default: *d
++ = s
[1]; s
+= 2; break;
139 if ((d1
= dehex(s
[2])) >= 0 && (d2
= dehex(s
[3])) >= 0 &&
140 (d3
= dehex(s
[4])) >= 0 && (d4
= dehex(s
[5])) >= 0) {
141 d
= encode_utf8(d
, d1
<< 12 | d2
<< 8 | d3
<< 4 | d4
);
162 static void fscan_string(hFILE
*fp
, kstring_t
*d
)
164 int c
, d1
, d2
, d3
, d4
;
166 while ((c
= hgetc(fp
)) != EOF
) switch (c
) {
168 if ((c
= hgetc(fp
)) == EOF
) return;
170 case 'b': kputc('\b', d
); break;
171 case 'f': kputc('\f', d
); break;
172 case 'n': kputc('\n', d
); break;
173 case 'r': kputc('\r', d
); break;
174 case 't': kputc('\t', d
); break;
175 default: kputc(c
, d
); break;
177 if ((c
= hgetc(fp
)) != EOF
&& (d1
= dehex(c
)) >= 0 &&
178 (c
= hgetc(fp
)) != EOF
&& (d2
= dehex(c
)) >= 0 &&
179 (c
= hgetc(fp
)) != EOF
&& (d3
= dehex(c
)) >= 0 &&
180 (c
= hgetc(fp
)) != EOF
&& (d4
= dehex(c
)) >= 0) {
182 char *lim
= encode_utf8(buf
, d1
<< 12 | d2
<< 8 | d3
<< 4 | d4
);
183 kputsn(buf
, lim
- buf
, d
);
198 static char token_type(hts_json_token
*token
)
200 const char *s
= token
->str
;
204 return (strcmp(s
, "false") == 0)? 'b' : '?';
206 return (strcmp(s
, "null") == 0)? '.' : '?';
208 return (strcmp(s
, "true") == 0)? 'b' : '?';
210 case '0': case '1': case '2': case '3': case '4':
211 case '5': case '6': case '7': case '8': case '9':
218 char hts_json_snext(char *str
, size_t *state
, hts_json_token
*token
)
220 char *s
= &str
[*state
>> 2];
221 int hidden
= *state
& 3;
225 return token
->type
= "?}]?"[hidden
];
228 #define STATE(s,h) (((s) - str) << 2 | (h))
230 for (;;) switch (*s
) {
241 return token
->type
= '\0';
247 *state
= STATE(s
+1, 0);
248 return token
->type
= *s
;
252 *state
= STATE(sscan_string(s
+1), 0);
253 return token
->type
= 's';
257 s
+= strcspn(s
, " \t\r\n,]}");
258 hidden
= (*s
== '}')? 1 : (*s
== ']')? 2 : 0;
259 if (*s
!= '\0') *s
++ = '\0';
260 *state
= STATE(s
, hidden
);
261 return token
->type
= token_type(token
);
267 char hts_json_fnext(struct hFILE
*fp
, hts_json_token
*token
, kstring_t
*kstr
)
272 for (;;) switch (c
= hgetc(fp
)) {
282 return token
->type
= '\0';
288 return token
->type
= c
;
292 fscan_string(fp
, kstr
);
293 if (kstr
->l
== 0) kputsn("", 0, kstr
);
294 token
->str
= kstr
->s
;
295 return token
->type
= 's';
300 while (hpeek(fp
, &peek
, 1) == 1 && !strchr(" \t\r\n,]}", peek
)) {
301 if ((c
= hgetc(fp
)) == EOF
) break;
304 token
->str
= kstr
->s
;
305 return token
->type
= token_type(token
);
310 typedef char hts_json_nextfn(void *arg1
, void *arg2
, hts_json_token
*token
);
312 static char skip_value(char type
, hts_json_nextfn
*next
, void *arg1
, void *arg2
)
314 hts_json_token token
;
317 switch (type
? type
: next(arg1
, arg2
, &token
)) {
336 switch (next(arg1
, arg2
, &token
)) {
360 static char snext(void *arg1
, void *arg2
, hts_json_token
*token
)
362 return hts_json_snext(arg1
, arg2
, token
);
364 char hts_json_sskip_value(char *str
, size_t *state
, char type
)
366 return skip_value(type
, snext
, str
, state
);
369 static char fnext(void *arg1
, void *arg2
, hts_json_token
*token
)
371 return hts_json_fnext(arg1
, token
, arg2
);
373 char hts_json_fskip_value(struct hFILE
*fp
, char type
)
375 kstring_t str
= { 0, 0, NULL
};
376 char ret
= skip_value(type
, fnext
, fp
, &str
);