modified: diffout.py
[GalaxyCodeBases.git] / c_cpp / lib / htslib / test / sam.c
blob736281d7d49d27a755c7b6407116dcd2e17fa2bb
1 /* test/sam.c -- SAM/BAM/CRAM API test cases.
3 Copyright (C) 2014-2017 Genome Research Ltd.
5 Author: John Marshall <jm18@sanger.ac.uk>
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
25 #include <config.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdint.h>
32 #include <inttypes.h>
33 #include <math.h>
35 // Suppress message for faidx_fetch_nseq(), which we're intentionally testing
36 #include "htslib/hts_defs.h"
37 #undef HTS_DEPRECATED
38 #define HTS_DEPRECATED(message)
40 #include "htslib/sam.h"
41 #include "htslib/faidx.h"
42 #include "htslib/kstring.h"
44 int status;
46 static void HTS_FORMAT(printf, 1, 2) fail(const char *fmt, ...)
48 va_list args;
50 fprintf(stderr, "Failed: ");
51 va_start(args, fmt);
52 vfprintf(stderr, fmt, args);
53 va_end(args);
54 fprintf(stderr, "\n");
56 status = EXIT_FAILURE;
59 uint8_t *check_bam_aux_get(const bam1_t *aln, const char *tag, char type)
61 uint8_t *p = bam_aux_get(aln, tag);
62 if (p) {
63 if (*p == type) return p;
64 else fail("%s field of type '%c', expected '%c'\n", tag, *p, type);
66 else fail("can't find %s field\n", tag);
68 return NULL;
71 static void check_int_B_array(bam1_t *aln, char *tag,
72 uint32_t nvals, int64_t *vals) {
73 uint8_t *p;
74 if ((p = check_bam_aux_get(aln, tag, 'B')) != NULL) {
75 uint32_t i;
77 if (bam_auxB_len(p) != nvals)
78 fail("Wrong length reported for %s field, got %u, expected %u\n",
79 tag, bam_auxB_len(p), nvals);
81 for (i = 0; i < nvals; i++) {
82 if (bam_auxB2i(p, i) != vals[i]) {
83 fail("Wrong value from bam_auxB2i for %s field index %u, "
84 "got %"PRId64" expected %"PRId64"\n",
85 tag, i, bam_auxB2i(p, i), vals[i]);
87 if (bam_auxB2f(p, i) != (double) vals[i]) {
88 fail("Wrong value from bam_auxB2f for %s field index %u, "
89 "got %f expected %f\n",
90 tag, i, bam_auxB2f(p, i), (double) vals[i]);
96 #define PI 3.141592653589793
97 #define E 2.718281828459045
98 #define HELLO "Hello, world!"
99 #define NEW_HELLO "Yo, dude"
100 #define BEEF "DEADBEEF"
102 #define str(x) #x
103 #define xstr(x) str(x)
105 static int aux_fields1(void)
107 static const char sam[] = "data:,"
108 "@SQ\tSN:one\tLN:1000\n"
109 "@SQ\tSN:two\tLN:500\n"
110 "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\n";
112 // Canonical form of the alignment record above, as output by sam_format1()
113 static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" NEW_HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tB0:B:i,-2147483648,-1,0,1,2147483647\tB1:B:I,0,1,2147483648,4294967295\tB2:B:s,-32768,-1,0,1,32767\tB3:B:S,0,1,32768,65535\tB4:B:c,-128,-1,0,1,127\tB5:B:C,0,1,127,255\tBf:B:f,-3.14159,2.71828\tZZ:i:1000000\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\tN0:i:-1234\tN1:i:1234";
115 samFile *in = sam_open(sam, "r");
116 bam_hdr_t *header = sam_hdr_read(in);
117 bam1_t *aln = bam_init1();
118 uint8_t *p;
119 kstring_t ks = { 0, 0, NULL };
120 int64_t b0vals[5] = { -2147483648LL,-1,0,1,2147483647LL }; // i
121 int64_t b1vals[4] = { 0,1,2147483648LL,4294967295LL }; // I
122 int64_t b2vals[5] = { -32768,-1,0,1,32767 }; // s
123 int64_t b3vals[4] = { 0,1,32768,65535 }; // S
124 int64_t b4vals[5] = { -128,-1,0,1,127 }; // c
125 int64_t b5vals[4] = { 0,1,127,255 }; // C
126 // NB: Floats not doubles below!
127 // See https://randomascii.wordpress.com/2012/06/26/doubles-are-not-floats-so-dont-compare-them/
128 float bfvals[2] = { -3.14159f, 2.71828f };
130 int32_t ival = -1234;
131 uint32_t uval = 1234;
133 size_t nvals, i;
135 if (sam_read1(in, header, aln) >= 0) {
136 if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k')
137 fail("XA field is '%c', expected 'k'", bam_aux2A(p));
139 bam_aux_del(aln,p);
140 if (bam_aux_get(aln,"XA"))
141 fail("XA field was not deleted");
143 if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37)
144 fail("Xi field is %"PRId64", expected 37", bam_aux2i(p));
146 if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6)
147 fail("Xf field is %.12f, expected pi", bam_aux2f(p));
149 if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6)
150 fail("Xf field is %.12f, expected e", bam_aux2f(p));
152 if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0)
153 fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO);
155 bam_aux_update_str(aln,"XZ",strlen(NEW_HELLO)+1,NEW_HELLO);
156 if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), NEW_HELLO) != 0)
157 fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), NEW_HELLO);
160 if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0)
161 fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF);
163 if ((p = check_bam_aux_get(aln, "XB", 'B'))
164 && ! (memcmp(p, "Bc", 2) == 0
165 && memcmp(p + 2, "\x03\x00\x00\x00\xfe\x00\x02", 7) == 0))
166 fail("XB field is %c,..., expected c,-2,0,+2", p[1]);
168 check_int_B_array(aln, "B0",
169 sizeof(b0vals) / sizeof(b0vals[0]), b0vals);
170 check_int_B_array(aln, "B1",
171 sizeof(b1vals) / sizeof(b1vals[0]), b1vals);
172 check_int_B_array(aln, "B2",
173 sizeof(b2vals) / sizeof(b2vals[0]), b2vals);
174 check_int_B_array(aln, "B3",
175 sizeof(b3vals) / sizeof(b3vals[0]), b3vals);
176 check_int_B_array(aln, "B4",
177 sizeof(b4vals) / sizeof(b4vals[0]), b4vals);
178 check_int_B_array(aln, "B5",
179 sizeof(b5vals) / sizeof(b5vals[0]), b5vals);
181 nvals = sizeof(bfvals) / sizeof(bfvals[0]);
182 if ((p = check_bam_aux_get(aln, "Bf", 'B')) != NULL) {
183 if (bam_auxB_len(p) != nvals)
184 fail("Wrong length reported for Bf field, got %d, expected %zd\n",
185 bam_auxB_len(p), nvals);
187 for (i = 0; i < nvals; i++) {
188 if (bam_auxB2f(p, i) != bfvals[i]) {
189 fail("Wrong value from bam_auxB2f for Bf field index %zd, "
190 "got %f expected %f\n",
191 i, bam_auxB2f(p, i), bfvals[i]);
196 if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000)
197 fail("ZZ field is %"PRId64", expected 1000000", bam_aux2i(p));
199 if ((p = bam_aux_get(aln, "Y1")) && bam_aux2i(p) != -2147483647-1)
200 fail("Y1 field is %"PRId64", expected -2^31", bam_aux2i(p));
202 if ((p = bam_aux_get(aln, "Y2")) && bam_aux2i(p) != -2147483647)
203 fail("Y2 field is %"PRId64", expected -2^31+1", bam_aux2i(p));
205 if ((p = bam_aux_get(aln, "Y3")) && bam_aux2i(p) != -1)
206 fail("Y3 field is %"PRId64", expected -1", bam_aux2i(p));
208 if ((p = bam_aux_get(aln, "Y4")) && bam_aux2i(p) != 0)
209 fail("Y4 field is %"PRId64", expected 0", bam_aux2i(p));
211 if ((p = bam_aux_get(aln, "Y5")) && bam_aux2i(p) != 1)
212 fail("Y5 field is %"PRId64", expected 1", bam_aux2i(p));
214 if ((p = bam_aux_get(aln, "Y6")) && bam_aux2i(p) != 2147483647)
215 fail("Y6 field is %"PRId64", expected 2^31-1", bam_aux2i(p));
217 if ((p = bam_aux_get(aln, "Y7")) && bam_aux2i(p) != 2147483648LL)
218 fail("Y7 field is %"PRId64", expected 2^31", bam_aux2i(p));
220 if ((p = bam_aux_get(aln, "Y8")) && bam_aux2i(p) != 4294967295LL)
221 fail("Y8 field is %"PRId64", expected 2^32-1", bam_aux2i(p));
223 // Try appending some new tags
224 if (bam_aux_append(aln, "N0", 'i', sizeof(ival), (uint8_t *) &ival) != 0)
225 fail("Failed to append N0:i tag");
227 if ((p = bam_aux_get(aln, "N0")) && bam_aux2i(p) != ival)
228 fail("N0 field is %"PRId64", expected %d", bam_aux2i(p), ival);
230 if (bam_aux_append(aln, "N1", 'I', sizeof(uval), (uint8_t *) &uval) != 0)
231 fail("failed to append N1:I tag");
233 if ((p = bam_aux_get(aln, "N1")) && bam_aux2i(p) != uval)
234 fail("N1 field is %"PRId64", expected %u", bam_aux2i(p), uval);
236 if (sam_format1(header, aln, &ks) < 0)
237 fail("can't format record");
239 if (strcmp(ks.s, r1) != 0)
240 fail("record formatted incorrectly: \"%s\"", ks.s);
242 free(ks.s);
244 else fail("can't read record");
246 bam_destroy1(aln);
247 bam_hdr_destroy(header);
248 sam_close(in);
250 return 1;
253 static void iterators1(void)
255 hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0));
256 hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_NONE, 0, 0));
259 static void copy_check_alignment(const char *infname, const char *informat,
260 const char *outfname, const char *outmode, const char *outref)
262 samFile *in = sam_open(infname, "r");
263 samFile *out = sam_open(outfname, outmode);
264 bam1_t *aln = bam_init1();
265 bam_hdr_t *header;
267 if (outref) {
268 if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0)
269 fail("setting reference %s for %s", outref, outfname);
272 header = sam_hdr_read(in);
273 if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname);
275 while (sam_read1(in, header, aln) >= 0) {
276 int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4;
277 if (mod4 != 0)
278 fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"",
279 informat, mod4, bam_get_qname(aln));
281 if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname);
284 bam_destroy1(aln);
285 bam_hdr_destroy(header);
286 sam_close(in);
287 sam_close(out);
290 static void samrecord_layout(void)
292 static const char qnames[] = "data:,"
293 "@SQ\tSN:CHROMOSOME_II\tLN:5000\n"
294 "a\t0\tCHROMOSOME_II\t100\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
295 "bc\t0\tCHROMOSOME_II\t200\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
296 "def\t0\tCHROMOSOME_II\t300\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
297 "ghij\t0\tCHROMOSOME_II\t400\t10\t4M\t*\t0\t0\tATGC\tqqqq\n"
298 "klmno\t0\tCHROMOSOME_II\t500\t10\t4M\t*\t0\t0\tATGC\tqqqq\n";
300 size_t bam1_t_size, bam1_t_size2;
302 bam1_t_size = 36 + sizeof (int) + 4 + sizeof (char *);
303 #ifndef BAM_NO_ID
304 bam1_t_size += 8;
305 #endif
306 bam1_t_size2 = bam1_t_size + 4; // Account for padding on some platforms
308 if (sizeof (bam1_core_t) != 36)
309 fail("sizeof bam1_core_t is %zu, expected 36", sizeof (bam1_core_t));
311 if (sizeof (bam1_t) != bam1_t_size && sizeof (bam1_t) != bam1_t_size2)
312 fail("sizeof bam1_t is %zu, expected either %zu or %zu",
313 sizeof(bam1_t), bam1_t_size, bam1_t_size2);
315 copy_check_alignment(qnames, "SAM",
316 "test/sam_alignment.tmp.bam", "wb", NULL);
317 copy_check_alignment("test/sam_alignment.tmp.bam", "BAM",
318 "test/sam_alignment.tmp.cram", "wc", "test/ce.fa");
319 copy_check_alignment("test/sam_alignment.tmp.cram", "CRAM",
320 "test/sam_alignment.tmp.sam_", "w", NULL);
323 static void faidx1(const char *filename)
325 int n, n_exp = 0;
326 char tmpfilename[FILENAME_MAX], line[500];
327 FILE *fin, *fout;
328 faidx_t *fai;
330 fin = fopen(filename, "r");
331 if (fin == NULL) fail("can't open %s\n", filename);
332 sprintf(tmpfilename, "%s.tmp", filename);
333 fout = fopen(tmpfilename, "w");
334 if (fout == NULL) fail("can't create temporary %s\n", tmpfilename);
335 while (fgets(line, sizeof line, fin)) {
336 if (line[0] == '>') n_exp++;
337 fputs(line, fout);
339 fclose(fin);
340 fclose(fout);
342 if (fai_build(tmpfilename) < 0) fail("can't index %s", tmpfilename);
343 fai = fai_load(tmpfilename);
344 if (fai == NULL) fail("can't load faidx file %s", tmpfilename);
346 n = faidx_fetch_nseq(fai);
347 if (n != n_exp)
348 fail("%s: faidx_fetch_nseq returned %d, expected %d", filename, n, n_exp);
350 n = faidx_nseq(fai);
351 if (n != n_exp)
352 fail("%s: faidx_nseq returned %d, expected %d", filename, n, n_exp);
354 fai_destroy(fai);
357 static void check_enum1(void)
359 // bgzf_compression() returns int, but enjoys this correspondence
360 if (no_compression != 0) fail("no_compression is %d", no_compression);
361 if (gzip != 1) fail("gzip is %d", gzip);
362 if (bgzf != 2) fail("bgzf is %d", bgzf);
365 int main(int argc, char **argv)
367 int i;
369 status = EXIT_SUCCESS;
371 aux_fields1();
372 iterators1();
373 samrecord_layout();
374 check_enum1();
375 for (i = 1; i < argc; i++) faidx1(argv[i]);
377 return status;