MACS2/OutputWriter.py

   1 # Time-stamp: <2019-09-20 11:59:32 taoliu>
   2
   3 """Module Description: Functions to write file
   4
   5 This code is free software; you can redistribute it and/or modify it
   6 under the terms of the BSD License (see the file LICENSE included with
   7 the distribution).
   8 """
   9
  10 # ------------------------------------
  11 # python modules
  12 # ------------------------------------
  13 import os
  14 import sys
  15 from array import array
  16 from MACS2.Constants import *
  17
  18 # ------------------------------------
  19 # constants
  20 # ------------------------------------
  21 # to determine the byte size
  22 if array('h',[1]).itemsize == 2:
  23     BYTE2 = 'h'
  24 else:
  25     raise Exception("BYTE2 type cannot be determined!")
  26 if array('i',[1]).itemsize == 4:
  27     BYTE4 = 'i'
  28 elif array('l',[1]).itemsize == 4:
  29     BYTE4 = 'l'
  30 else:
  31     raise Exception("BYTE4 type cannot be determined!")
  32
  33 if array('f',[1]).itemsize == 4:
  34     FBYTE4 = 'f'
  35 elif array('d',[1]).itemsize == 4:
  36     FBYTE4 = 'd'
  37 else:
  38     raise Exception("FBYTE4 type cannot be determined!")
  39
  40 # ------------------------------------
  41 # Misc functions
  42 # ------------------------------------
  43 def zwig_write (trackI, subdir, fileprefix, d, log=None,space=10, single=False):
  44     """Write shifted tags information in wiggle file in a given
  45     step. Then compress it using 'gzip' program.
  46
  47     trackI: shifted tags from PeakDetect object
  48     subdir: directory where to put the wiggle file
  49     fileprefix: wiggle file prefix
  50     d     : d length
  51     log   : logging function, default is sys.stderr.write
  52     space : space to write tag number on spots, default 10
  53     """
  54     if not log:
  55         log = lambda x: sys.stderr.write(x+"\n")
  56     chrs = trackI.get_chr_names()
  57     os.makedirs (subdir)
  58     step = 10000000 + 2*d
  59
  60     if single:
  61         log("write to a wiggle file")
  62         f = os.path.join(subdir,fileprefix+"_all"+".wig")
  63         wigfhd = open(f,"w")
  64         wigfhd.write("track type=wiggle_0 name=\"%s_all\" description=\"Extended tag pileup from MACS version %s for every %d bp\"\n" % (fileprefix.replace('_afterfiting',''), MACS_VERSION, space)) # data type line
  65
  66     for chrom in chrs:
  67         if not single:
  68             f = os.path.join(subdir,fileprefix+"_"+chrom+".wig")
  69             log("write to "+f+" for chromosome "+chrom)
  70             wigfhd = open(f,"w")
  71             # suggested by dawe
  72             wigfhd.write("track type=wiggle_0 name=\"%s_%s\" description=\"Extended tag pileup from MACS version %s for every %d bp\"\n" % ( fileprefix.replace('_afterfiting',''), chrom, MACS_VERSION, space)) # data type line
  73         else:
  74             log("write data for chromosome "+chrom)
  75
  76         wigfhd.write("variableStep chrom=%s span=%d\n" % (chrom,space))
  77         tags = trackI.get_locations_by_chr(chrom)[0]
  78         l = len(tags)
  79         window_counts = array(BYTE4,[0]*step)
  80         startp = -1*d
  81         endp   = startp+step
  82         index_tag = 0
  83
  84         while index_tag<l:
  85             s = tags[index_tag]-d/2     # start of tag
  86             e = s+d                     # end of tag
  87
  88             if e < endp:
  89                 # project tag to window_counts line
  90                 ps = s-startp # projection start
  91                 pe = ps+d     # projection end
  92                 for i in range(ps,pe):
  93                     window_counts[i] += 1
  94                 index_tag += 1
  95             else:
  96                 # write it to zwig file then reset parameters
  97                 # keep this tag for next window
  98                 for i in range(d,step-d,space):
  99                     if window_counts[i] == 0:
 100                         pass
 101                     else:
 102                         wigfhd.write("%d\t%d\n" % (i+startp+1,window_counts[i]))
 103                 # reset
 104                 window_counts_next = array(BYTE4,[0]*step)
 105                 # copy d values from the tail of previous window to next window
 106                 for n,i in enumerate(range(step-2*d,step)): # debug
 107                     window_counts_next[n] = window_counts[i]
 108                 window_counts = window_counts_next
 109                 startp = endp - 2*d
 110                 endp = startp+step
 111         # last window
 112         for i in range(d,step-d,space):
 113             if window_counts[i] == 0:
 114                 pass
 115             else:
 116                 wigfhd.write("%d\t%d\n" % (i+startp+1,window_counts[i]))
 117         if not single:
 118             wigfhd.close()
 119             log("compress the wiggle file using gzip...")
 120             os.system("gzip "+f)
 121     if single:
 122         wigfhd.close()
 123         log("compress the wiggle file using gzip...")
 124         os.system("gzip "+f)
 125
 126
 127 def zbdg_write (trackI, subdir, fileprefix, d, log=None, single=False):
 128     """Write shifted tags information in wiggle file in a given
 129     step. Then compress it using 'gzip' program.
 130
 131     trackI: shifted tags from PeakDetect object
 132     subdir: directory where to put the wiggle file
 133     fileprefix: wiggle file prefix
 134     d     : d length
 135     log   : logging function, default is sys.stderr.write
 136     space : space to write tag number on spots, default 10
 137     """
 138     if not log:
 139         log = lambda x: sys.stderr.write(x+"\n")
 140     chrs = trackI.get_chr_names()
 141     os.makedirs (subdir)
 142     step = 10000000 + 2*d
 143
 144     if single:
 145         log("write to a bedGraph file")
 146         f = os.path.join(subdir,fileprefix+"_all"+".bdg")
 147         bdgfhd = open(f,"w")
 148         bdgfhd.write("track type=bedGraph name=\"%s_all\" description=\"Extended tag pileup from MACS version %s\"\n" % (fileprefix.replace('_afterfiting',''), MACS_VERSION)) # data type line
 149
 150     for chrom in chrs:
 151         if not single:
 152             f = os.path.join(subdir,fileprefix+"_"+chrom+".bdg")
 153             log("write to "+f+" for chromosome "+chrom)
 154             bdgfhd = open(f,"w")
 155             bdgfhd.write("track type=bedGraph name=\"%s_%s\" description=\"Extended tag pileup from MACS version %s\"\n" % (fileprefix.replace('_afterfiting',''), chrom, MACS_VERSION)) # data type line
 156         else:
 157             log("write data for chromosome "+chrom)
 158
 159         tags = trackI.get_locations_by_chr(chrom)[0]
 160         l = len(tags)
 161         window_counts = array(BYTE4,[0]*step)
 162         startp = -1*d
 163         endp   = startp+step
 164         index_tag = 0
 165
 166         while index_tag<l:
 167             s = tags[index_tag]-d/2     # start of tag
 168             e = s+d                     # end of tag
 169
 170             if e < endp:
 171                 # project tag to window_counts line
 172                 ps = s-startp # projection start
 173                 pe = ps+d     # projection end
 174                 for i in range(ps,pe):
 175                     window_counts[i] += 1
 176                 index_tag += 1
 177             else:
 178                 # write it to zbdg file then reset parameters
 179                 # keep this tag for next window
 180                 prev = window_counts[d]
 181                 left = startp+d
 182                 right = left+1
 183                 for i in range(d+1,step-d):
 184                     if window_counts[i] == prev:
 185                         # same value, extend
 186                         right += 1
 187                     else:
 188                         # diff value, close
 189                         if prev != 0:
 190                             bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
 191                         prev = window_counts[i]
 192                         left = right
 193                         right = left + 1
 194                 # last bin
 195                 if prev != 0:
 196                     bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
 197
 198                 # reset
 199                 window_counts_next = array(BYTE4,[0]*step)
 200                 # copy d values from the tail of previous window to next window
 201                 for n,i in enumerate(range(step-2*d,step)): # debug
 202                     window_counts_next[n] = window_counts[i]
 203                 window_counts = window_counts_next
 204                 startp = endp - 2*d
 205                 endp = startp+step
 206         # last window
 207         prev = window_counts[d]
 208         left = startp+d
 209         right = left+1
 210         for i in range(d+1,step-d):
 211             if window_counts[i] == prev:
 212                 # same value, exrend
 213                 right += 1
 214             else:
 215                 # diff value, close
 216                 if prev != 0:
 217                     bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
 218                 prev = window_counts[i]
 219                 left = right
 220                 right = left + 1
 221         # last bin
 222         if prev != 0:
 223             bdgfhd.write("%s\t%d\t%d\t%d\n" % (chrom,left,right,prev))
 224
 225         if not single:
 226             bdgfhd.close()
 227             log("compress the bedGraph file using gzip...")
 228             os.system("gzip "+f)
 229     if single:
 230         bdgfhd.close()
 231         log("compress the bedGraph file using gzip...")
 232         os.system("gzip "+f)
 233
 234
 235 def model2r_script(model,filename,name):
 236     rfhd = open(filename,"w")
 237     p = model.plus_line
 238     m = model.minus_line
 239     ycorr = model.ycorr
 240     xcorr = model.xcorr
 241     alt_d = model.alternative_d
 242     #s = model.shifted_line
 243     d = model.d
 244     w = len(p)
 245     norm_p = [0]*w
 246     norm_m = [0]*w
 247     #norm_s = [0]*w
 248     sum_p = sum(p)
 249     sum_m = sum(m)
 250     #sum_s = sum(s)
 251     for i in range(w):
 252         norm_p[i] = float(p[i])*100/sum_p
 253         norm_m[i] = float(m[i])*100/sum_m
 254         #norm_s[i] = float(s[i])*100/sum_s
 255     rfhd.write("# R script for Peak Model\n")
 256     rfhd.write("#  -- generated by MACS\n")
 257
 258     rfhd.write("""p <- c(%s)
 259 m <- c(%s)
 260 ycorr <- c(%s)
 261 xcorr <- c(%s)
 262 altd  <- c(%s)
 263 x <- seq.int((length(p)-1)/2*-1,(length(p)-1)/2)
 264 pdf('%s_model.pdf',height=6,width=6)
 265 plot(x,p,type='l',col=c('red'),main='Peak Model',xlab='Distance to the middle',ylab='Percentage')
 266 lines(x,m,col=c('blue'))
 267 legend('topleft',c('forward tags','reverse tags'),lty=c(1,1,1),col=c('red','blue'))
 268 plot(xcorr,ycorr,type='l',col=c('black'),main='Cross-Correlation',xlab='Lag between + and - tags',ylab='Correlation')
 269 abline(v=altd,lty=2,col=c('red'))
 270 legend('topleft','alternative lag(s)',lty=2,col='red')
 271 legend('right','alt lag(s) : %s',bty='n')
 272 dev.off()
 273 """ % (','.join(map(str,norm_p)),
 274        ','.join(map(str,norm_m)),
 275        ','.join(map(str,ycorr)),
 276        ','.join(map(str,xcorr)),
 277        ', '.join(map(str,alt_d)),
 278        name,
 279        ','.join(map(str,alt_d))
 280        ))
 281     rfhd.close()
 282
 283 def diag_write (filename, diag_result):
 284     ofhd_diag = open(filename,"w")
 285     a = diag_result[0]
 286     l = len(a)-2
 287     s = [90-x*10 for x in range(l)]
 288     ofhd_diag.write("FC range\t# of Peaks\tcover by sampling %s\n" % ("%\t".join (map(str,s))+"%"))
 289     format = "%s\t%d"+"\t%.2f"*l+"\n"
 290     ofhd_diag.write( "".join( [format % tuple(x) for x in diag_result])  )
 291     ofhd_diag.close()
 292
 293 # ------------------------------------
 294 # Classes
 295 # ------------------------------------