modified: myjupyterlab.sh
[GalaxyCodeBases.git] / tools / lh3misc / seq / bc2rg.lua
blobeb5b0fafcdeefdeb3431450605934072059f608a
1 #!/usr/bin/env luajit
3 -- Description: string split
4 function string:split(sep, n)
5 local a, start = {}, 1;
6 sep = sep or "%s+";
7 repeat
8 local b, e = self:find(sep, start);
9 if b == nil then
10 table.insert(a, self:sub(start));
11 break
12 end
13 a[#a+1] = self:sub(start, b - 1);
14 start = e + 1;
15 if n and #a == n then
16 table.insert(a, self:sub(start));
17 break
18 end
19 until start > #self;
20 return a;
21 end
23 -- Description: intelligent file open
24 function io.xopen(fn, mode)
25 mode = mode or 'r';
26 if fn == nil then return io.stdin;
27 elseif fn == '-' then return (mode == 'r' and io.stdin) or io.stdout;
28 elseif fn:sub(-3) == '.gz' then return (mode == 'r' and io.popen('gzip -dc ' .. fn, 'r')) or io.popen('gzip > ' .. fn, 'w');
29 elseif fn:sub(-4) == '.bz2' then return (mode == 'r' and io.popen('bzip2 -dc ' .. fn, 'r')) or io.popen('bgzip2 > ' .. fn, 'w');
30 else return io.open(fn, mode) end
31 end
33 if #arg < 3 then
34 print("Usage: bc2rg.lua <barcode.list> <in.sam> <libname>");
35 os.exit(1);
36 end
38 -- read barcode.list
39 local hash = {};
40 local hdr = {};
41 local nuc = {'A', 'C', 'G', 'T'};
42 local fp = io.xopen(arg[1]);
43 for line in fp:lines() do
44 local bc, sam = line:match("^(%S+)%s+(%S+)");
45 local rg = sam..'-'..arg[3]..'-'..bc;
46 hash[bc] = rg;
47 table.insert(hdr, "@RG\tID:"..rg.."\tSM:"..sam.."\tLB:"..rg);
48 for i = 1, #bc do
49 for j = 1, 4 do
50 if nuc[j] ~= bc:sub(i, i) then
51 local b = bc:sub(1, i-1) .. nuc[j] .. bc:sub(i+1);
52 hash[b] = rg;
53 end
54 end
55 end
56 end
57 table.insert(hdr, "@RG\tID:N/A\tSM:N/A\tLB:N/A");
58 fp:close();
60 -- process the SAM file
61 fp = io.xopen(arg[2]);
62 local first = true;
63 for l in fp:lines() do
64 if l:sub(1,1) == '@' then
65 print(l);
66 else
67 if first then
68 print(table.concat(hdr, "\n"));
69 first = false;
70 end
71 local bc = l:match("BC:Z:(%S+)"):upper();
72 if bc == nil or hash[bc] == nil then
73 print(l, "RG:Z:N/A");
74 else
75 print(l, "RG:Z:" .. hash[bc]);
76 end
77 end
78 end
79 fp:close();