Merge pull request #378 from taoliu/fix_setup_script_364
[MACS.git] / MACS2 / OutputWriter.py
blob340c917a7ca96afd02247d06bc18ed5139c479af
1 # Time-stamp: <2019-09-20 11:59:32 taoliu>
3 """Module Description: Functions to write file
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
7 the distribution).
8 """
10 # ------------------------------------
11 # python modules
12 # ------------------------------------
13 import os
14 import sys
15 from array import array
16 from MACS2.Constants import *
18 # ------------------------------------
19 # constants
20 # ------------------------------------
21 # to determine the byte size
22 if array('h',[1]).itemsize == 2:
23 BYTE2 = 'h'
24 else:
25 raise Exception("BYTE2 type cannot be determined!")
26 if array('i',[1]).itemsize == 4:
27 BYTE4 = 'i'
28 elif array('l',[1]).itemsize == 4:
29 BYTE4 = 'l'
30 else:
31 raise Exception("BYTE4 type cannot be determined!")
33 if array('f',[1]).itemsize == 4:
34 FBYTE4 = 'f'
35 elif array('d',[1]).itemsize == 4:
36 FBYTE4 = 'd'
37 else:
38 raise Exception("FBYTE4 type cannot be determined!")
40 # ------------------------------------
41 # Misc functions
42 # ------------------------------------
43 def zwig_write (trackI, subdir, fileprefix, d, log=None,space=10, single=False):
44 """Write shifted tags information in wiggle file in a given
45 step. Then compress it using 'gzip' program.
47 trackI: shifted tags from PeakDetect object
48 subdir: directory where to put the wiggle file
49 fileprefix: wiggle file prefix
50 d : d length
51 log : logging function, default is sys.stderr.write
52 space : space to write tag number on spots, default 10
53 """
54 if not log:
55 log = lambda x: sys.stderr.write(x+"\n")
56 chrs = trackI.get_chr_names()
57 os.makedirs (subdir)
58 step = 10000000 + 2*d
60 if single:
61 log("write to a wiggle file")
62 f = os.path.join(subdir,fileprefix+"_all"+".wig")
63 wigfhd = open(f,"w")
64 wigfhd.write("track type=wiggle_0 name=\"%s_all\" description=\"Extended tag pileup from MACS version %s for every %d bp\"\n" % (fileprefix.replace('_afterfiting',''), MACS_VERSION, space)) # data type line
66 for chrom in chrs:
67 if not single:
68 f = os.path.join(subdir,fileprefix+"_"+chrom+".wig")
69 log("write to "+f+" for chromosome "+chrom)
70 wigfhd = open(f,"w")
71 # suggested by dawe
72 wigfhd.write("track type=wiggle_0 name=\"%s_%s\" description=\"Extended tag pileup from MACS version %s for every %d bp\"\n" % ( fileprefix.replace('_afterfiting',''), chrom, MACS_VERSION, space)) # data type line
73 else:
74 log("write data for chromosome "+chrom)
76 wigfhd.write("variableStep chrom=%s span=%d\n" % (chrom,space))
77 tags = trackI.get_locations_by_chr(chrom)[0]
78 l = len(tags)
79 window_counts = array(BYTE4,[0]*step)
80 startp = -1*d
81 endp = startp+step
82 index_tag = 0
84 while index_tag<l:
85 s = tags[index_tag]-d/2 # start of tag
86 e = s+d # end of tag
88 if e < endp:
89 # project tag to window_counts line
90 ps = s-startp # projection start
91 pe = ps+d # projection end
92 for i in range(ps,pe):
93 window_counts[i] += 1
94 index_tag += 1
95 else:
96 # write it to zwig file then reset parameters
97 # keep this tag for next window
98 for i in range(d,step-d,space):
99 if window_counts[i] == 0:
100 pass
101 else:
102 wigfhd.write("%d\t%d\n" % (i+startp+1,window_counts[i]))
103 # reset
104 window_counts_next = array(BYTE4,[0]*step)
105 # copy d values from the tail of previous window to next window
106 for n,i in enumerate(range(step-2*d,step)): # debug
107 window_counts_next[n] = window_counts[i]
108 window_counts = window_counts_next
109 startp = endp - 2*d
110 endp = startp+step
111 # last window
112 for i in range(d,step-d,space):
113 if window_counts[i] == 0:
114 pass
115 else:
116 wigfhd.write("%d\t%d\n" % (i+startp+1,window_counts[i]))
117 if not single:
118 wigfhd.close()
119 log("compress the wiggle file using gzip...")
120 os.system("gzip "+f)
121 if single:
122 wigfhd.close()
123 log("compress the wiggle file using gzip...")
124 os.system("gzip "+f)
127 def zbdg_write (trackI, subdir, fileprefix, d, log=None, single=False):
128 """Write shifted tags information in wiggle file in a given
129 step. Then compress it using 'gzip' program.
131 trackI: shifted tags from PeakDetect object
132 subdir: directory where to put the wiggle file
133 fileprefix: wiggle file prefix
134 d : d length
135 log : logging function, default is sys.stderr.write
136 space : space to write tag number on spots, default 10
138 if not log:
139 log = lambda x: sys.stderr.write(x+"\n")
140 chrs = trackI.get_chr_names()
141 os.makedirs (subdir)
142 step = 10000000 + 2*d
144 if single:
145 log("write to a bedGraph file")
146 f = os.path.join(subdir,fileprefix+"_all"+".bdg")
147 bdgfhd = open(f,"w")
148 bdgfhd.write("track type=bedGraph name=\"%s_all\" description=\"Extended tag pileup from MACS version %s\"\n" % (fileprefix.replace('_afterfiting',''), MACS_VERSION)) # data type line
150 for chrom in chrs:
151 if not single:
152 f = os.path.join(subdir,fileprefix+"_"+chrom+".bdg")
153 log("write to "+f+" for chromosome "+chrom)
154 bdgfhd = open(f,"w")
155 bdgfhd.write("track type=bedGraph name=\"%s_%s\" description=\"Extended tag pileup from MACS version %s\"\n" % (fileprefix.replace('_afterfiting',''), chrom, MACS_VERSION)) # data type line
156 else:
157 log("write data for chromosome "+chrom)
159 tags = trackI.get_locations_by_chr(chrom)[0]
160 l = len(tags)
161 window_counts = array(BYTE4,[0]*step)
162 startp = -1*d
163 endp = startp+step
164 index_tag = 0
166 while index_tag<l:
167 s = tags[index_tag]-d/2 # start of tag
168 e = s+d # end of tag
170 if e < endp:
171 # project tag to window_counts line
172 ps = s-startp # projection start
173 pe = ps+d # projection end
174 for i in range(ps,pe):
175 window_counts[i] += 1
176 index_tag += 1
177 else:
178 # write it to zbdg file then reset parameters
179 # keep this tag for next window
180 prev = window_counts[d]
181 left = startp+d
182 right = left+1
183 for i in range(d+1,step-d):
184 if window_counts[i] == prev:
185 # same value, extend
186 right += 1
187 else:
188 # diff value, close
189 if prev != 0:
190 bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
191 prev = window_counts[i]
192 left = right
193 right = left + 1
194 # last bin
195 if prev != 0:
196 bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
198 # reset
199 window_counts_next = array(BYTE4,[0]*step)
200 # copy d values from the tail of previous window to next window
201 for n,i in enumerate(range(step-2*d,step)): # debug
202 window_counts_next[n] = window_counts[i]
203 window_counts = window_counts_next
204 startp = endp - 2*d
205 endp = startp+step
206 # last window
207 prev = window_counts[d]
208 left = startp+d
209 right = left+1
210 for i in range(d+1,step-d):
211 if window_counts[i] == prev:
212 # same value, exrend
213 right += 1
214 else:
215 # diff value, close
216 if prev != 0:
217 bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
218 prev = window_counts[i]
219 left = right
220 right = left + 1
221 # last bin
222 if prev != 0:
223 bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
225 if not single:
226 bdgfhd.close()
227 log("compress the bedGraph file using gzip...")
228 os.system("gzip "+f)
229 if single:
230 bdgfhd.close()
231 log("compress the bedGraph file using gzip...")
232 os.system("gzip "+f)
235 def model2r_script(model,filename,name):
236 rfhd = open(filename,"w")
237 p = model.plus_line
238 m = model.minus_line
239 ycorr = model.ycorr
240 xcorr = model.xcorr
241 alt_d = model.alternative_d
242 #s = model.shifted_line
243 d = model.d
244 w = len(p)
245 norm_p = [0]*w
246 norm_m = [0]*w
247 #norm_s = [0]*w
248 sum_p = sum(p)
249 sum_m = sum(m)
250 #sum_s = sum(s)
251 for i in range(w):
252 norm_p[i] = float(p[i])*100/sum_p
253 norm_m[i] = float(m[i])*100/sum_m
254 #norm_s[i] = float(s[i])*100/sum_s
255 rfhd.write("# R script for Peak Model\n")
256 rfhd.write("# -- generated by MACS\n")
258 rfhd.write("""p <- c(%s)
259 m <- c(%s)
260 ycorr <- c(%s)
261 xcorr <- c(%s)
262 altd <- c(%s)
263 x <- seq.int((length(p)-1)/2*-1,(length(p)-1)/2)
264 pdf('%s_model.pdf',height=6,width=6)
265 plot(x,p,type='l',col=c('red'),main='Peak Model',xlab='Distance to the middle',ylab='Percentage')
266 lines(x,m,col=c('blue'))
267 legend('topleft',c('forward tags','reverse tags'),lty=c(1,1,1),col=c('red','blue'))
268 plot(xcorr,ycorr,type='l',col=c('black'),main='Cross-Correlation',xlab='Lag between + and - tags',ylab='Correlation')
269 abline(v=altd,lty=2,col=c('red'))
270 legend('topleft','alternative lag(s)',lty=2,col='red')
271 legend('right','alt lag(s) : %s',bty='n')
272 dev.off()
273 """ % (','.join(map(str,norm_p)),
274 ','.join(map(str,norm_m)),
275 ','.join(map(str,ycorr)),
276 ','.join(map(str,xcorr)),
277 ', '.join(map(str,alt_d)),
278 name,
279 ','.join(map(str,alt_d))
281 rfhd.close()
283 def diag_write (filename, diag_result):
284 ofhd_diag = open(filename,"w")
285 a = diag_result[0]
286 l = len(a)-2
287 s = [90-x*10 for x in range(l)]
288 ofhd_diag.write("FC range\t# of Peaks\tcover by sampling %s\n" % ("%\t".join (map(str,s))+"%"))
289 format = "%s\t%d"+"\t%.2f"*l+"\n"
290 ofhd_diag.write( "".join( [format % tuple(x) for x in diag_result]) )
291 ofhd_diag.close()
293 # ------------------------------------
294 # Classes
295 # ------------------------------------