modified: myjupyterlab.sh
[GalaxyCodeBases.git] / tools / lh3misc / seq / sam2bp.lua
blobd6f146b4037895cc92b01d456bff2917951de0a8
1 #!/usr/bin/env luajit
3 local bit = require("bit")
5 function io.xopen(fn, mode)
6 mode = mode or 'r';
7 if fn == nil then return io.stdin;
8 elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout;
9 elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w');
10 elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w');
11 else return io.open(fn, mode) end
12 end
14 function string:split(sep, n)
15 local a, start = {}, 1;
16 sep = sep or "%s+";
17 repeat
18 local b, e = self:find(sep, start);
19 if b == nil then
20 table.insert(a, self:sub(start));
21 break
22 end
23 a[#a+1] = self:sub(start, b - 1);
24 start = e + 1;
25 if n and #a == n then
26 table.insert(a, self:sub(start));
27 break
28 end
29 until start > #self;
30 return a;
31 end
33 local comp_table = {}
34 comp_table[string.byte('A')] = 'T';
35 comp_table[string.byte('T')] = 'A';
36 comp_table[string.byte('C')] = 'G';
37 comp_table[string.byte('G')] = 'C';
39 local fp = io.xopen(arg[1])
40 local last, out1, out2, k
41 for l in fp:lines() do
42 if l:sub(1, 1) ~= '@' then
43 local t = l:split('\t', 12)
44 if t[10]:find('N') == nil then
45 if last ~= t[1] then
46 if out2 and k == 2 and out1[3] >= 25 then
47 print(table.concat(out1, "\t"), table.concat(out2, "\t"))
48 out2 = nil
49 end
50 k, last = 1, t[1]
51 else k = k + 1 end
52 if k == 1 then
53 out1 = {t[1], t[10], t[11], tonumber(t[2]), t[3], t[4], t[5], t[6]}
54 elseif k == 2 then
55 out2 = {t[2], t[3], t[4], t[5], t[6]}
56 if bit.band(out1[4], 16) ~= 0 then -- reverse complement
57 local x, s = {}, out1[2]:reverse()
58 for i = 1, #s do x[i] = comp_table[s:byte(i)] end
59 out1[2] = table.concat(x)
60 end
61 -- compute the average base quality
62 if out1[2]:find("[CGT]") == nil then out1[3] = 0 -- poly A
63 else
64 local x, s = 0, out1[3];
65 for i = 1, #s do x = x + s:byte(i) - 33 end
66 x = math.floor(x / #s + .499)
67 out1[3] = x
68 end
69 end -- do nothing if there are 3 or more hits
70 end
71 end
72 end