MACS3/Signal/Pileup.pyx

   1 # cython: language_level=3
   2 # cython: profile=True
   3 # Time-stamp: <2023-08-02 14:19:28 Tao Liu>
   4
   5 """Module Description: For pileup functions.
   6
   7 This code is free software; you can redistribute it and/or modify it
   8 under the terms of the BSD License (see the file LICENSE included with
   9 the distribution).
  10 """
  11
  12 # ------------------------------------
  13 # python modules
  14 # ------------------------------------
  15 from time import time as ttime
  16
  17 # ------------------------------------
  18 # MACS3 modules
  19 # ------------------------------------
  20 from MACS3.Utilities.Constants import *
  21 from MACS3.Signal.cPosValCalculation cimport single_end_pileup as c_single_end_pileup
  22 from MACS3.Signal.cPosValCalculation cimport write_pv_array_to_bedGraph as c_write_pv_array_to_bedGraph
  23 from MACS3.Signal.cPosValCalculation cimport PosVal
  24 from MACS3.Signal.cPosValCalculation cimport quick_pileup as c_quick_pileup
  25
  26 # ------------------------------------
  27 # Other modules
  28 # ------------------------------------
  29 import numpy as np
  30 cimport numpy as np
  31 from numpy cimport int32_t, float32_t
  32 from cpython cimport bool
  33 from cpython cimport PyObject
  34
  35 # ------------------------------------
  36 # C lib
  37 # ------------------------------------
  38 from libc.stdlib cimport malloc, free, qsort
  39
  40 # ------------------------------------
  41 # utility internal functions
  42 # ------------------------------------
  43 cdef inline float mean( float a, float b ):
  44     return ( a + b ) / 2
  45
  46 cdef void clean_up_ndarray ( np.ndarray x ):
  47     """ Clean up numpy array in two steps
  48     """
  49     cdef:
  50         long i
  51     i = x.shape[0] // 2
  52     x.resize( 100000 if i > 100000 else i, refcheck=False)
  53     x.resize( 0, refcheck=False)
  54     return
  55
  56 cdef np.ndarray[np.int32_t, ndim=1] fix_coordinates(np.ndarray[np.int32_t, ndim=1] poss, int rlength):
  57     """Fix the coordinates.
  58     """
  59     cdef:
  60         long i
  61         int32_t * ptr
  62
  63     ptr = <int32_t *> poss.data
  64
  65     # fix those negative coordinates
  66     for i in range( poss.shape[0] ):
  67         if ptr[i] < 0:
  68             ptr[i] = 0
  69         else:
  70             break
  71
  72     # fix those over-boundary coordinates
  73     for i in range( poss.shape[0]-1, -1, -1 ):
  74         if ptr[i] > rlength:
  75             ptr[i] = rlength
  76         else:
  77             break
  78     return poss
  79
  80 # ------------------------------------
  81 # functions
  82 # ------------------------------------
  83
  84 # ------------------------------------
  85 # functions for pileup_cmd only
  86 # ------------------------------------
  87
  88 # This function uses pure C code for pileup
  89 cpdef void pileup_and_write_se( trackI,
  90                         bytes output_filename,
  91                         int d,
  92                         float scale_factor,
  93                         float baseline_value = 0.0,
  94                         bint directional = True,
  95                         bint halfextension = True ):
  96     """ Pileup a FWTrackI object and write the pileup result into a
  97     bedGraph file.
  98
  99     This function calls pure C functions from cPosValCalculation.
 100
 101     This function is currently only used `macs3 pileup` cmd.
 102     """
 103     cdef:
 104         long five_shift, three_shift, l, i
 105         list chroms
 106         int n_chroms
 107         bytes chrom
 108         np.ndarray[np.int32_t, ndim=1] plus_tags, minus_tags
 109         dict chrlengths = trackI.get_rlengths ()
 110         int * plus_tags_pos
 111         int * minus_tags_pos
 112         long rlength
 113         long fl
 114         bytes py_bytes
 115         char * chrom_char
 116         PosVal * _data
 117         long l_data
 118
 119     # This block should be reused to determine the actual shift values
 120     if directional:
 121         # only extend to 3' side
 122         if halfextension:
 123             five_shift = d//-4  # five shift is used to move cursor towards 5' direction to find the start of fragment
 124             three_shift = d*3//4 # three shift is used to move cursor towards 3' direction to find the end of fragment
 125         else:
 126             five_shift = 0
 127             three_shift = d
 128     else:
 129         # both sides
 130         if halfextension:
 131             five_shift = d//4
 132             three_shift = five_shift
 133         else:
 134             five_shift = d//2
 135             three_shift = d - five_shift
 136     # end of the block
 137
 138     chroms = list(chrlengths.keys())
 139     n_chroms = len( chroms )
 140
 141     fh = open(output_filename, "w")
 142     fh.write("")
 143     fh.close()
 144
 145     for i in range( n_chroms ):
 146         chrom = chroms[ i ]
 147         (plus_tags, minus_tags) = trackI.get_locations_by_chr(chrom)
 148         rlength = <long> chrlengths[ chrom ]
 149         plus_tags_pos = <int *> plus_tags.data
 150         minus_tags_pos = <int *> minus_tags.data
 151
 152         _data = c_single_end_pileup( plus_tags_pos, plus_tags.shape[0], minus_tags_pos, minus_tags.shape[0], five_shift, three_shift, 0, rlength, scale_factor, baseline_value, &l_data )
 153
 154         # write
 155         py_bytes = chrom
 156         chrom_char = py_bytes
 157         c_write_pv_array_to_bedGraph( _data, l_data, chrom_char, output_filename, 1 )
 158
 159         # clean
 160         free( _data )
 161     return
 162
 163 # function to pileup BAMPE/BEDPE stored in PETrackI object and write to a BEDGraph file
 164 # this function uses c function
 165 cpdef pileup_and_write_pe( petrackI,
 166                            bytes output_filename,
 167                            float scale_factor = 1,
 168                            float baseline_value = 0.0):
 169     """ Pileup a PETrackI object and write the pileup result into a
 170     bedGraph file.
 171
 172     This function calls pure C functions from cPosValCalculation.
 173
 174     This function is currently only used `macs3 pileup` cmd.
 175     """
 176     cdef:
 177         dict chrlengths = petrackI.get_rlengths ()
 178         list chroms
 179         int n_chroms
 180         int i
 181         bytes chrom
 182
 183         np.ndarray locs
 184         np.ndarray[np.int32_t, ndim=1] locs0
 185         np.ndarray[np.int32_t, ndim=1] locs1
 186
 187         int * start_pos
 188         int * end_pos
 189         long fl
 190         bytes py_bytes
 191         char * chrom_char
 192         PosVal * _data
 193         long l_data
 194
 195     chroms = list(chrlengths.keys())
 196     n_chroms = len( chroms )
 197
 198     fh = open(output_filename, "w")
 199     fh.write("")
 200     fh.close()
 201
 202     for i in range( n_chroms ):
 203         chrom = chroms[ i ]
 204         locs = petrackI.get_locations_by_chr(chrom)
 205
 206         locs0 = np.sort(locs['l'])
 207         locs1 = np.sort(locs['r'])
 208         start_pos = <int *> locs0.data
 209         end_pos   = <int *> locs1.data
 210
 211         _data = c_quick_pileup ( start_pos, end_pos, locs0.shape[0], scale_factor, baseline_value, &l_data )
 212
 213         # write
 214         py_bytes = chrom
 215         chrom_char = py_bytes
 216         c_write_pv_array_to_bedGraph( _data, l_data, chrom_char, output_filename, 1 )
 217
 218         # clean
 219         free( _data )
 220     return
 221
 222 # ------------------------------------
 223 # functions for other codes
 224 # ------------------------------------
 225
 226 # general pileup function implemented in cython
 227 cpdef list se_all_in_one_pileup ( np.ndarray[np.int32_t, ndim=1] plus_tags, np.ndarray[np.int32_t, ndim=1] minus_tags, long five_shift, long three_shift, int rlength, float scale_factor, float baseline_value ):
 228     """Return pileup given 5' end of fragment at plus or minus strand
 229     separately, and given shift at both direction to recover a
 230     fragment. This function is for single end sequencing library
 231     only. Please directly use 'quick_pileup' function for Pair-end
 232     library.
 233
 234     It contains a super-fast and simple algorithm proposed by Jie
 235     Wang. It will take sorted start positions and end positions, then
 236     compute pileup values.
 237
 238     It will return a pileup result in similar structure as
 239     bedGraph. There are two python arrays:
 240
 241     [end positions, values] or '[p,v] array' in other description for
 242     functions within MACS3.
 243
 244     Two arrays have the same length and can be matched by index. End
 245     position at index x (p[x]) record continuous value of v[x] from
 246     p[x-1] to p[x].
 247
 248     """
 249     cdef:
 250         long i_s, i_e, i, I
 251         int a, b, p, pre_p, pileup
 252         list tmp
 253         long lp = plus_tags.shape[0]
 254         long lm = minus_tags.shape[0]
 255         long l = lp + lm
 256         np.ndarray[np.int32_t, ndim=1] start_poss, end_poss, ret_p
 257         np.ndarray[np.float32_t, ndim=1] ret_v
 258         # pointers are used for numpy arrays
 259         int32_t * start_poss_ptr
 260         int32_t * end_poss_ptr
 261         int32_t * ret_p_ptr     # pointer for position array
 262         float32_t * ret_v_ptr     # pointer for value array
 263
 264
 265     start_poss = np.concatenate( ( plus_tags-five_shift, minus_tags-three_shift ) )
 266     end_poss   = np.concatenate( ( plus_tags+three_shift, minus_tags+five_shift ) )
 267
 268     # sort
 269     start_poss.sort()
 270     end_poss.sort()
 271
 272     # fix negative coordinations and those extends over end of chromosomes
 273     start_poss = fix_coordinates(start_poss, rlength)
 274     end_poss = fix_coordinates(end_poss, rlength)
 275
 276     lx = start_poss.shape[0]
 277
 278     start_poss_ptr = <int32_t *> start_poss.data
 279     end_poss_ptr = <int32_t *> end_poss.data
 280
 281     ret_p = np.zeros( 2 * lx, dtype="int32" )
 282     ret_v = np.zeros( 2 * lx, dtype="float32" )
 283
 284     ret_p_ptr = <int32_t *> ret_p.data
 285     ret_v_ptr = <float32_t *> ret_v.data
 286
 287     tmp = [ret_p, ret_v]        # for (endpos,value)
 288     i_s = 0                     # index of start_poss
 289     i_e = 0                     # index of end_poss
 290     I = 0
 291
 292     pileup = 0
 293     if start_poss.shape[0] == 0: return tmp
 294     pre_p = min(start_poss_ptr[0],end_poss_ptr[0])
 295
 296     if pre_p != 0:
 297         # the first chunk of 0
 298         ret_p_ptr[0] = pre_p
 299         ret_v_ptr[0] = max(0,baseline_value)
 300         ret_p_ptr += 1
 301         ret_v_ptr += 1
 302         I += 1
 303
 304     pre_v = pileup
 305
 306     assert start_poss.shape[0] == end_poss.shape[0]
 307     lx = start_poss.shape[0]
 308
 309     while i_s < lx and i_e < lx:
 310         if start_poss_ptr[0] < end_poss_ptr[0]:
 311             p = start_poss_ptr[0]
 312             if p != pre_p:
 313                 ret_p_ptr[0] = p
 314                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 315                 ret_p_ptr += 1
 316                 ret_v_ptr += 1
 317                 I += 1
 318                 pre_p = p
 319             pileup += 1
 320             i_s += 1
 321             start_poss_ptr += 1
 322         elif start_poss_ptr[0] > end_poss_ptr[0]:
 323             p = end_poss_ptr[0]
 324             if p != pre_p:
 325                 ret_p_ptr[0] = p
 326                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 327                 ret_p_ptr += 1
 328                 ret_v_ptr += 1
 329                 I += 1
 330                 pre_p = p
 331             pileup -= 1
 332             i_e += 1
 333             end_poss_ptr += 1
 334         else:
 335             i_s += 1
 336             i_e += 1
 337             start_poss_ptr += 1
 338             end_poss_ptr += 1
 339
 340     if i_e < lx:
 341         # add rest of end positions
 342         for i in range(i_e, lx):
 343             p = end_poss_ptr[0]
 344             if p != pre_p:
 345                 ret_p_ptr[0] = p
 346                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 347                 ret_p_ptr += 1
 348                 ret_v_ptr += 1
 349                 I += 1
 350                 pre_p = p
 351             pileup -= 1
 352             end_poss_ptr += 1
 353
 354     # clean mem
 355     clean_up_ndarray( start_poss )
 356     clean_up_ndarray( end_poss )
 357
 358     # resize
 359     ret_p.resize( I, refcheck=False )
 360     ret_v.resize( I, refcheck=False )
 361
 362     return tmp
 363
 364 # quick pileup implemented in cython
 365 cpdef list quick_pileup ( np.ndarray[np.int32_t, ndim=1] start_poss, np.ndarray[np.int32_t, ndim=1] end_poss, float scale_factor, float baseline_value ):
 366     """Return pileup given plus strand and minus strand positions of fragments.
 367
 368     A super-fast and simple algorithm proposed by Jie Wang. It will
 369     take sorted start positions and end positions, then compute pileup
 370     values.
 371
 372     It will return a pileup result in similar structure as
 373     bedGraph. There are two python arrays:
 374
 375     [end positions, values] or [p,v]
 376
 377     Two arrays have the same length and can be matched by index. End
 378     position at index x (p[x]) record continuous value of v[x] from
 379     p[x-1] to p[x].
 380
 381     """
 382     cdef:
 383         long i_s, i_e, i, I
 384         int a, b, p, pre_p, pileup
 385         list tmp
 386         long ls = start_poss.shape[0]
 387         long le = end_poss.shape[0]
 388         long l = ls + le
 389         np.ndarray[np.int32_t, ndim=1] ret_p
 390         np.ndarray[np.float32_t, ndim=1] ret_v
 391         # pointers are used for numpy arrays
 392         int32_t * start_poss_ptr
 393         int32_t * end_poss_ptr
 394         int32_t * ret_p_ptr     # pointer for position array
 395         float32_t * ret_v_ptr     # pointer for value array
 396         #int max_pileup = 0
 397
 398     start_poss_ptr = <int32_t *> start_poss.data
 399     end_poss_ptr = <int32_t *> end_poss.data
 400
 401     ret_p = np.zeros( l, dtype="int32" )
 402     ret_v = np.zeros( l, dtype="float32" )
 403
 404     ret_p_ptr = <int32_t *> ret_p.data
 405     ret_v_ptr = <float32_t *> ret_v.data
 406
 407     tmp = [ret_p, ret_v] # for (endpos,value)
 408
 409     i_s = 0                         # index of plus_tags
 410     i_e = 0                         # index of minus_tags
 411     I = 0
 412
 413     pileup = 0
 414     if ls == 0: return tmp
 415     pre_p = min(start_poss_ptr[0], end_poss_ptr[0])
 416
 417     if pre_p != 0:
 418         # the first chunk of 0
 419         ret_p_ptr[0] = pre_p
 420         ret_v_ptr[0] = max(0,baseline_value)
 421         ret_p_ptr += 1
 422         ret_v_ptr += 1
 423         I += 1
 424
 425     pre_v = pileup
 426
 427     while i_s < ls and i_e < le:
 428         if start_poss_ptr[0] < end_poss_ptr[0]:
 429             p = start_poss_ptr[0]
 430             if p != pre_p:
 431                 ret_p_ptr[0] = p
 432                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 433                 ret_p_ptr += 1
 434                 ret_v_ptr += 1
 435                 I += 1
 436                 pre_p = p
 437             pileup += 1
 438             #if pileup > max_pileup:
 439             #    max_pileup = pileup
 440             i_s += 1
 441             start_poss_ptr += 1
 442         elif start_poss_ptr[0] > end_poss_ptr[0]:
 443             p = end_poss_ptr[0]
 444             if p != pre_p:
 445                 ret_p_ptr[0] = p
 446                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 447                 ret_p_ptr += 1
 448                 ret_v_ptr += 1
 449                 I += 1
 450                 pre_p = p
 451             pileup -= 1
 452             i_e += 1
 453             end_poss_ptr += 1
 454         else:
 455             i_s += 1
 456             i_e += 1
 457             start_poss_ptr += 1
 458             end_poss_ptr += 1
 459
 460     if i_e < le:
 461         # add rest of end positions
 462         for i in range(i_e, le):
 463             p = end_poss_ptr[0]
 464             #for p in minus_tags[i_e:]:
 465             if p != pre_p:
 466                 ret_p_ptr[0] = p
 467                 ret_v_ptr[0] = max(pileup * scale_factor, baseline_value)
 468                 ret_p_ptr += 1
 469                 ret_v_ptr += 1
 470                 I += 1
 471                 pre_p = p
 472             pileup -= 1
 473             end_poss_ptr += 1
 474
 475     ret_p.resize( I, refcheck=False )
 476     ret_v.resize( I, refcheck=False )
 477
 478     return tmp
 479
 480 # quick pileup implemented in cython
 481 cpdef list naive_quick_pileup ( np.ndarray[np.int32_t, ndim=1] sorted_poss, int extension):
 482     """Simple pileup, every tag will be extended left and right with length `extension`.
 483
 484     Note: Assumption is that `poss` has to be pre-sorted! There is no
 485     check on whether it's sorted.
 486     """
 487     cdef:
 488         long i_s, i_e, i, I
 489         int a, b, p, pre_p, pileup
 490         long l = sorted_poss.shape[0]
 491         np.ndarray[np.int32_t, ndim=1] start_poss
 492         np.ndarray[np.int32_t, ndim=1] end_poss
 493         np.ndarray[np.int32_t, ndim=1] ret_p
 494         np.ndarray[np.float32_t, ndim=1] ret_v
 495         # pointers are used for numpy arrays
 496         int32_t * start_poss_ptr
 497         int32_t * end_poss_ptr
 498         int32_t * ret_p_ptr     # pointer for position array
 499         float32_t * ret_v_ptr     # pointer for value array
 500
 501     start_poss = sorted_poss - extension
 502     start_poss[ start_poss < 0 ] = 0
 503     end_poss = sorted_poss + extension
 504
 505     start_poss_ptr = <int32_t *> start_poss.data
 506     end_poss_ptr = <int32_t *> end_poss.data
 507
 508
 509     ret_p = np.zeros( 2*l, dtype="int32" )
 510     ret_v = np.zeros( 2*l, dtype="float32" )
 511
 512     ret_p_ptr = <int32_t *> ret_p.data
 513     ret_v_ptr = <float32_t *> ret_v.data
 514
 515     i_s = 0                         # index of plus_tags
 516     i_e = 0                         # index of minus_tags
 517     I = 0
 518
 519     pileup = 0
 520     if l == 0: return ret
 521     pre_p = min(start_poss_ptr[0], end_poss_ptr[0])
 522
 523     if pre_p != 0:
 524         # the first chunk of 0
 525         ret_p_ptr[0] = pre_p
 526         ret_v_ptr[0] = 0
 527         ret_p_ptr += 1
 528         ret_v_ptr += 1
 529         I += 1
 530
 531     pre_v = pileup
 532
 533     while i_s < l and i_e < l:
 534         if start_poss_ptr[0] < end_poss_ptr[0]:
 535             p = start_poss_ptr[0]
 536             if p != pre_p:
 537                 ret_p_ptr[0] = p
 538                 ret_v_ptr[0] = pileup
 539                 ret_p_ptr += 1
 540                 ret_v_ptr += 1
 541                 I += 1
 542                 pre_p = p
 543             pileup += 1
 544             i_s += 1
 545             start_poss_ptr += 1
 546         elif start_poss_ptr[0] > end_poss_ptr[0]:
 547             p = end_poss_ptr[0]
 548             if p != pre_p:
 549                 ret_p_ptr[0] = p
 550                 ret_v_ptr[0] = pileup
 551                 ret_p_ptr += 1
 552                 ret_v_ptr += 1
 553                 I += 1
 554                 pre_p = p
 555             pileup -= 1
 556             i_e += 1
 557             end_poss_ptr += 1
 558         else:
 559             i_s += 1
 560             i_e += 1
 561             start_poss_ptr += 1
 562             end_poss_ptr += 1
 563
 564     # add rest of end positions
 565     if i_e < l:
 566         for i in range(i_e, l):
 567             p = end_poss_ptr[0]
 568             if p != pre_p:
 569                 ret_p_ptr[0] = p
 570                 ret_v_ptr[0] = pileup
 571                 ret_p_ptr += 1
 572                 ret_v_ptr += 1
 573                 I += 1
 574                 pre_p = p
 575             pileup -= 1
 576             end_poss_ptr += 1
 577
 578     ret_p.resize( I, refcheck=False )
 579     ret_v.resize( I, refcheck=False )
 580
 581     return [ ret_p, ret_v ]
 582
 583 # general function to compare two pv arrays in cython.
 584 cpdef list over_two_pv_array ( list pv_array1, list pv_array2, func="max" ):
 585     """Merge two position-value arrays. For intersection regions, take
 586     the maximum value within region.
 587
 588     pv_array1 and pv_array2 are [p,v] type lists, same as the output
 589     from quick_pileup function. 'p' and 'v' are numpy arrays of int32
 590     and float32.
 591
 592     available operations are 'max', 'min', and 'mean'
 593     """
 594
 595     cdef:
 596         int pre_p, p1, p2
 597         float v1, v2
 598         np.ndarray[np.int32_t, ndim=1] a1_pos, a2_pos, ret_pos
 599         np.ndarray[np.float32_t, ndim=1] a1_v, a2_v, ret_v
 600         int32_t * a1_pos_ptr
 601         int32_t * a2_pos_ptr
 602         int32_t * ret_pos_ptr
 603         float32_t * a1_v_ptr
 604         float32_t * a2_v_ptr
 605         float32_t * ret_v_ptr
 606         long l1, l2, l, i1, i2, I
 607
 608     if func == "max":
 609         f = max
 610     elif func == "min":
 611         f = min
 612     elif func == "mean":
 613         f = mean
 614     else:
 615         raise Exception("Invalid function")
 616
 617     [ a1_pos, a1_v ] = pv_array1
 618     [ a2_pos, a2_v ] = pv_array2
 619     ret_pos = np.zeros( a1_pos.shape[0] + a2_pos.shape[0], dtype="int32" )
 620     ret_v   = np.zeros( a1_pos.shape[0] + a2_pos.shape[0], dtype="float32" )
 621
 622     a1_pos_ptr = <int32_t *> a1_pos.data
 623     a1_v_ptr = <float32_t *> a1_v.data
 624     a2_pos_ptr = <int32_t *> a2_pos.data
 625     a2_v_ptr = <float32_t *> a2_v.data
 626     ret_pos_ptr = <int32_t *> ret_pos.data
 627     ret_v_ptr = <float32_t *> ret_v.data
 628
 629     l1 = a1_pos.shape[0]
 630     l2 = a2_pos.shape[0]
 631
 632     i1 = 0
 633     i2 = 0
 634     I = 0
 635
 636     pre_p = 0                   # remember the previous position in the new bedGraphTrackI object ret
 637
 638     while i1 < l1 and i2 < l2:
 639         ret_v_ptr[0] =  f( a1_v_ptr[0], a2_v_ptr[0] )
 640         I += 1
 641         if a1_pos_ptr[0] < a2_pos_ptr[0]:
 642             # clip a region from pre_p to p1, then set pre_p as p1.
 643             ret_pos_ptr[0] = a1_pos_ptr[0]
 644             ret_pos_ptr += 1
 645             ret_v_ptr += 1
 646             pre_p = a1_pos_ptr[0]
 647             # call for the next p1 and v1
 648             a1_pos_ptr += 1
 649             a1_v_ptr += 1
 650             i1 += 1
 651         elif a1_pos_ptr[0] > a2_pos_ptr[0]:
 652             # clip a region from pre_p to p2, then set pre_p as p2.
 653             ret_pos_ptr[0] = a2_pos_ptr[0]
 654             ret_pos_ptr += 1
 655             ret_v_ptr += 1
 656             pre_p = a2_pos_ptr[0]
 657             # call for the next p1 and v1
 658             a2_pos_ptr += 1
 659             a2_v_ptr += 1
 660             i2 += 1
 661         else:
 662             # from pre_p to p1 or p2, then set pre_p as p1 or p2.
 663             ret_pos_ptr[0] = a1_pos_ptr[0]
 664             ret_pos_ptr += 1
 665             ret_v_ptr += 1
 666             pre_p = a1_pos_ptr[0]
 667             # call for the next p1, v1, p2, v2.
 668             a1_pos_ptr += 1
 669             a1_v_ptr += 1
 670             i1 += 1
 671             a2_pos_ptr += 1
 672             a2_v_ptr += 1
 673             i2 += 1
 674
 675     ret_pos.resize( I, refcheck=False )
 676     ret_v.resize( I, refcheck=False )
 677     return [ret_pos, ret_v]
 678
 679 cpdef naive_call_peaks ( list pv_array, float min_v, float max_v = 1e30, int max_gap = 50, int min_length = 200 ):
 680     cdef:
 681         int peak_length, pre_p, p, i, summit, tstart, tend
 682         long x                  # index used for searching the first peak
 683         double v, summit_value, tvalue
 684         bytes chrom
 685         set chrs
 686         list peak_content       # (pre_p, p, v) for each region in the peak
 687         list ret_peaks = []     # returned peak summit and height
 688
 689     peak_content = []
 690     ( ps,vs ) = pv_array
 691     psn = iter(ps).__next__         # assign the next function to a viable to speed up
 692     vsn = iter(vs).__next__
 693     x = 0
 694     pre_p = 0                   # remember previous position
 695     while True:
 696         # find the first region above min_v
 697         try:                    # try to read the first data range for this chrom
 698             p = psn()
 699             v = vsn()
 700         except:
 701             break
 702         x += 1                  # index for the next point
 703         if v > min_v:
 704             peak_content = [ ( pre_p , p, v ), ]
 705             pre_p = p
 706             break               # found the first range above min_v
 707         else:
 708             pre_p = p
 709
 710     for i in range( x, len( ps ) ):
 711         # continue scan the rest regions
 712         p = psn()
 713         v = vsn()
 714         if v <= min_v: # not be detected as 'peak'
 715             pre_p = p
 716             continue
 717         # for points above min_v
 718         # if the gap is allowed
 719         # gap = pre_p - peak_content[-1][1] or the dist between pre_p and the last p
 720         if pre_p - peak_content[ -1 ][ 1 ] <= max_gap:
 721             peak_content.append( ( pre_p, p, v ) )
 722         else:
 723             # when the gap is not allowed, close this peak IF length is larger than min_length
 724             if peak_content[ -1 ][ 1 ] - peak_content[ 0 ][ 0 ] >= min_length:
 725                 __close_peak( peak_content, ret_peaks, max_v, min_length )
 726             # reset and start a new peak
 727             peak_content = [ ( pre_p, p, v ), ]
 728         pre_p = p
 729
 730     # save the last peak
 731     if peak_content:
 732         if peak_content[ -1 ][ 1 ] - peak_content[ 0 ][ 0 ] >= min_length:
 733             __close_peak( peak_content, ret_peaks, max_v, min_length )
 734     return ret_peaks
 735
 736 cdef void __close_peak( peak_content, peaks, float max_v, int min_length ):
 737     """Internal function to find the summit and height
 738
 739     If the height is larger than max_v, skip
 740     """
 741     tsummit = []
 742     summit = 0
 743     summit_value = 0
 744     for (tstart,tend,tvalue) in peak_content:
 745         if not summit_value or summit_value < tvalue:
 746             tsummit = [int((tend+tstart)/2),]
 747             summit_value = tvalue
 748         elif summit_value == tvalue:
 749             tsummit.append( int((tend+tstart)/2) )
 750     summit = tsummit[int((len(tsummit)+1)/2)-1 ]
 751     if summit_value < max_v:
 752         peaks.append( (summit, summit_value) )
 753     return