1 /* Time-stamp: <2020-11-23 14:52:31 Tao Liu>
3 This code is free software; you can redistribute it and/or modify it
4 under the terms of the BSD License (see the file LICENSE included
5 with the distribution).
11 #include "cPosValCalculation.h"
13 /* Simple compare function */
14 int cmpfunc_simple (const void * a
, const void * b
)
16 return ( *(int*)a
- *(int*)b
);
23 1. int * poss: must be sorted!
28 int * fix_coordinates ( int * poss
, long l
, int leftmost_coord
, int rightmost_coord
)
31 for ( i
= 0; i
< l
; i
++ )
33 if ( poss
[ i
] < leftmost_coord
)
34 poss
[ i
] = leftmost_coord
;
39 for ( i
= l
-1; i
> -1; i
-- )
41 if ( poss
[ i
] > rightmost_coord
)
42 poss
[ i
] = rightmost_coord
;
49 /* Pileup function for single end data.
51 1. int * plus_tags: the 5' ends of tags aligned to plus strand. Start from 0.
52 2. long l_plus_tags: number of plus tags.
53 3. int * minus_tags: the 3' ends of tags aligned to minus strand. Start from 0.
54 4. long l_minus_tags: number of minus tags.
55 5. int five_shift: shift value at 5' end to recover the DNA fragment
56 6. int three_shift: shift value at 3' end to recover the DNA fragment
57 7. int leftmost_coord: any coords smaller than it will be set to it
58 8. int rightmost_coord: any coords larger than it will be set to it
59 9. float scale_factor: scale factor on pileup
60 10. float baseline_value: the minimum value of pileup
63 1. PosVal *: position-value pairs in bedGraph fashion.
64 2. long * final_length: the final length for the returned array of PosVal.
68 pileup = single_end_pileup ( {1,2,3}, 3, {2,3,4}, 3, 0, 3, 0, 20, 0.5, 1.0, &final_length );
69 for ( i = 0; i < final_length; i++ )
71 printf( "pos:%d value:%.2f\n", pileup[i].pos, pileup[i].value );
75 struct PosVal
* single_end_pileup( int * plus_tags
, long l_plus_tags
, int * minus_tags
, long l_minus_tags
, int five_shift
, int three_shift
, int leftmost_coord
, int rightmost_coord
, float scale_factor
, float baseline_value
, long * final_length
)
79 long l
= l_plus_tags
+ l_minus_tags
;
80 int * start_poss
, * end_poss
, * ptr_start_poss
, * ptr_end_poss
;
81 struct PosVal
* pos_value_array
;
83 start_poss
= (int *) malloc( l
* sizeof( int ) );
84 end_poss
= (int *) malloc( l
* sizeof( int ) );
86 ptr_start_poss
= start_poss
;
87 ptr_end_poss
= end_poss
;
89 for ( i
= 0; i
< l_plus_tags
; i
++ )
91 *ptr_start_poss
= plus_tags
[ i
] - five_shift
; ptr_start_poss
++;
92 *ptr_end_poss
= plus_tags
[ i
] + three_shift
; ptr_end_poss
++;
95 for ( i
= 0; i
< l_minus_tags
; i
++ )
97 *ptr_start_poss
= minus_tags
[ i
] - three_shift
; ptr_start_poss
++;
98 *ptr_end_poss
= minus_tags
[ i
] + five_shift
; ptr_end_poss
++;
101 qsort( start_poss
, l
, sizeof( int ), cmpfunc_simple
);
102 qsort( end_poss
, l
, sizeof( int ), cmpfunc_simple
);
104 // fix negative coordinations and those extends over end of chromosomes
105 start_poss
= fix_coordinates ( start_poss
, l
, leftmost_coord
, rightmost_coord
);
106 end_poss
= fix_coordinates ( end_poss
, l
, leftmost_coord
, rightmost_coord
);
108 pos_value_array
= quick_pileup ( start_poss
, end_poss
, l
, scale_factor
, baseline_value
, final_length
);
113 return pos_value_array
;
116 /* Assume start_poss and end_poss have been sorted and the coordinates have been fixed. */
117 struct PosVal
* quick_pileup ( int * start_poss
, int * end_poss
, long length_poss
, float scale_factor
, float baseline_value
, long * final_length
)
120 int p
, pre_p
, pileup
;
121 struct PosVal
* pos_value_array
, * ptr_pos_value_array
;
122 int * ptr_start_poss
, * ptr_end_poss
;
123 long l
= length_poss
;
125 pos_value_array
= (struct PosVal
*) malloc ( 2 * l
* sizeof( struct PosVal
) );
129 ptr_pos_value_array
= pos_value_array
; ptr_start_poss
= start_poss
; ptr_end_poss
= end_poss
;
132 pre_p
= min( *start_poss
, *end_poss
);
133 ptr_start_poss
++; ptr_end_poss
++;
138 (*ptr_pos_value_array
).pos
= pre_p
;
139 (*ptr_pos_value_array
).value
= max( 0, baseline_value
);
140 ptr_pos_value_array
++; I
++;
143 ptr_start_poss
= start_poss
;
144 ptr_end_poss
= end_poss
;
146 while (i_s
< l
&& i_e
< l
)
148 if ( *ptr_start_poss
< *ptr_end_poss
)
153 (*ptr_pos_value_array
).pos
= p
;
154 (*ptr_pos_value_array
).value
= max( pileup
* scale_factor
, baseline_value
);
155 ptr_pos_value_array
++; I
++;
162 else if ( *ptr_start_poss
> *ptr_end_poss
)
167 (*ptr_pos_value_array
).pos
= p
;
168 (*ptr_pos_value_array
).value
= max( pileup
* scale_factor
, baseline_value
);
169 ptr_pos_value_array
++; I
++;
185 // add the rest of end positions.
188 for ( i
= i_e
; i
< l
; i
++ )
193 (*ptr_pos_value_array
).pos
= p
;
194 (*ptr_pos_value_array
).value
= max( pileup
* scale_factor
, baseline_value
);
195 ptr_pos_value_array
++; I
++;
202 pos_value_array
= (struct PosVal
*) realloc ( pos_value_array
, I
* sizeof( struct PosVal
) );
203 *final_length
= I
; /* return the final length of pos_value_array */
204 return pos_value_array
;
207 long quick_pileup_simple ( int * ret_poss
, float * ret_values
, int * start_poss
, int * end_poss
, long length_poss
, float scale_factor
, float baseline_value
)
210 int p
, pre_p
, pileup
;
212 int * ptr_start_poss
, * ptr_end_poss
;
213 float * ptr_ret_values
;
214 long l
= length_poss
;
216 ptr_ret_poss
= ret_poss
; ptr_ret_values
= ret_values
;
217 ptr_start_poss
= start_poss
; ptr_end_poss
= end_poss
;
222 pre_p
= min( *ptr_start_poss
, *ptr_end_poss
);
223 ptr_start_poss
++; ptr_end_poss
++;
228 *ptr_ret_poss
= pre_p
;
229 *ptr_ret_values
= max( 0, baseline_value
);
230 ptr_ret_poss
++; ptr_ret_values
++; I
++;
233 while (i_s
< l
&& i_e
< l
)
235 if ( *ptr_start_poss
< *ptr_end_poss
)
241 *ptr_ret_values
= max( pileup
* scale_factor
, baseline_value
);
242 ptr_ret_poss
++;ptr_ret_values
++; I
++;
249 else if ( *ptr_start_poss
> *ptr_end_poss
)
255 *ptr_ret_values
= max( pileup
* scale_factor
, baseline_value
);
256 ptr_ret_poss
++;ptr_ret_values
++; I
++;
272 // add the rest of end positions.
275 for ( i
= i_e
; i
< l
; i
++ )
281 *ptr_ret_values
= max( pileup
* scale_factor
, baseline_value
);
282 ptr_ret_poss
++;ptr_ret_values
++; I
++;
292 /* Calculate the maximum value between two sets of PosVal arrays (like bedGraph type of data) */
293 struct PosVal
* max_over_two_pv_array ( struct PosVal
* pva1
, long l_pva1
, struct PosVal
* pva2
, long l_pva2
, long * final_length
)
295 struct PosVal
* ptr_pva1
, * ptr_pva2
;
296 struct PosVal
* ret_pva
, * ptr_ret_pva
;
299 ptr_pva1
= pva1
; ptr_pva2
= pva2
;
300 ret_pva
= ( struct PosVal
* ) malloc ( ( l_pva1
+ l_pva2
) * sizeof( struct PosVal
) );
301 ptr_ret_pva
= ret_pva
;
307 while ( i1
< l_pva1
&& i2
< l_pva2
)
309 if ( (*ptr_pva1
).pos
< (*ptr_pva2
).pos
)
311 (*ptr_ret_pva
).pos
= (*ptr_pva1
).pos
;
312 (*ptr_ret_pva
).value
= max( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
316 else if ( (*ptr_pva1
).pos
> (*ptr_pva2
).pos
)
318 (*ptr_ret_pva
).pos
= (*ptr_pva2
).pos
;
319 (*ptr_ret_pva
).value
= max( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
323 else // (*ptr_pva1).pos == (*ptr_pva2).pos
325 (*ptr_ret_pva
).pos
= (*ptr_pva1
).pos
;
326 (*ptr_ret_pva
).value
= max( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
337 /* Calculate using specified function between two sets of PosVal arrays (like bedGraph type of data) */
338 struct PosVal
* apply_func_two_pv_array ( float (*func
)(float, float), struct PosVal
* pva1
, long l_pva1
, struct PosVal
* pva2
, long l_pva2
, long * final_length
)
340 struct PosVal
* ptr_pva1
, * ptr_pva2
;
341 struct PosVal
* ret_pva
, * ptr_ret_pva
;
344 ptr_pva1
= pva1
; ptr_pva2
= pva2
;
345 ret_pva
= ( struct PosVal
* ) malloc ( ( l_pva1
+ l_pva2
) * sizeof( struct PosVal
) );
346 ptr_ret_pva
= ret_pva
;
352 while ( i1
< l_pva1
&& i2
< l_pva2
)
354 if ( (*ptr_pva1
).pos
< (*ptr_pva2
).pos
)
356 (*ptr_ret_pva
).pos
= (*ptr_pva1
).pos
;
357 (*ptr_ret_pva
).value
= (*func
)( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
361 else if ( (*ptr_pva1
).pos
> (*ptr_pva2
).pos
)
363 (*ptr_ret_pva
).pos
= (*ptr_pva2
).pos
;
364 (*ptr_ret_pva
).value
= (*func
)( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
368 else // (*ptr_pva1).pos == (*ptr_pva2).pos
370 (*ptr_ret_pva
).pos
= (*ptr_pva1
).pos
;
371 (*ptr_ret_pva
).value
= (*func
)( (*ptr_pva1
).value
, (*ptr_pva2
).value
);
382 /* Align two PosVal arrays according to their overlaps */
383 struct PosValVal
* align_two_pv_array ( struct PosVal
* pva1
, long l_pva1
, struct PosVal
* pva2
, long l_pva2
, long * final_length
)
385 struct PosVal
* ptr_pva1
, * ptr_pva2
;
386 struct PosValVal
* ret_pvva
, * ptr_ret_pvva
;
389 ptr_pva1
= pva1
; ptr_pva2
= pva2
;
390 ret_pvva
= ( struct PosValVal
* ) malloc ( ( l_pva1
+ l_pva2
) * sizeof( struct PosValVal
) );
391 ptr_ret_pvva
= ret_pvva
;
397 while ( i1
< l_pva1
&& i2
< l_pva2
)
399 if ( (*ptr_pva1
).pos
< (*ptr_pva2
).pos
)
401 (*ptr_ret_pvva
).pos
= (*ptr_pva1
).pos
;
402 (*ptr_ret_pvva
).value1
= (*ptr_pva1
).value
;
403 (*ptr_ret_pvva
).value2
= (*ptr_pva2
).value
;
407 else if ( (*ptr_pva1
).pos
> (*ptr_pva2
).pos
)
409 (*ptr_ret_pvva
).pos
= (*ptr_pva2
).pos
;
410 (*ptr_ret_pvva
).value1
= (*ptr_pva1
).value
;
411 (*ptr_ret_pvva
).value2
= (*ptr_pva2
).value
;
415 else // (*ptr_pva1).pos == (*ptr_pva2).pos
417 (*ptr_ret_pvva
).pos
= (*ptr_pva1
).pos
;
418 (*ptr_ret_pvva
).value1
= (*ptr_pva1
).value
;
419 (*ptr_ret_pvva
).value2
= (*ptr_pva2
).value
;
430 /* Write pos-value array to a bedGraph file. If append is non-zero then just add content to the existing file. */
431 void write_pv_array_to_bedGraph ( struct PosVal
* pv_array
, long l_pv_array
, char * chromosome
, char * bdgfile
, short append
)
439 fp
= fopen ( bdgfile
, "a" );
441 fp
= fopen ( bdgfile
, "w" );
445 pre_e
= (*pv_array
).pos
;
446 pre_v
= (*pv_array
).value
;
450 for ( i
= 1; i
< l_pv_array
; i
++ )
452 if ( (*pv_array
).value
!= pre_v
)
454 fprintf ( fp
, "%s\t%d\t%d\t%.5f\n", chromosome
, pre_s
, pre_e
, pre_v
);
456 pre_e
= (*pv_array
).pos
;
457 pre_v
= (*pv_array
).value
;
461 pre_e
= (*pv_array
).pos
;
467 fprintf ( fp
, "%s\t%d\t%d\t%.5f\n", chromosome
, pre_s
, pre_e
, pre_v
);
478 int p_array1
[3] = {11,12,13};
479 int p_array2
[3] = {12,13,14};
480 int m_array1
[3] = {12,13,14};
481 int m_array2
[3] = {13,14,15};
482 int leftmost_coord
= 0;
483 int rightmost_coord
= 20;
484 float scale_factor
= 0.5;
485 long final_length1
= 0;
486 long final_length2
= 0;
487 long final_length_max
= 0;
488 struct PosVal
* pileup1
;
489 struct PosVal
* pileup2
;
490 struct PosVal
* max_pileup
;
492 pileup1
= single_end_pileup ( p_array1
, 3, m_array1
, 3, five_shift
, three_shift
, leftmost_coord
, rightmost_coord
, scale_factor
, 0, &final_length1
);
493 pileup2
= single_end_pileup ( p_array2
, 3, m_array2
, 3, five_shift
, three_shift
, leftmost_coord
, rightmost_coord
, scale_factor
, 0, &final_length2
);
495 printf( "pileup 1\n" );
496 for ( i
= 0; i
< final_length1
; i
++ )
498 printf( "pos:%d value:%.2f\n", pileup1
[i
].pos
, pileup1
[i
].value
);
500 printf( "pileup 2\n" );
501 for ( i
= 0; i
< final_length2
; i
++ )
503 printf( "pos:%d value:%.2f\n", pileup2
[i
].pos
, pileup2
[i
].value
);
506 max_pileup
= max_over_two_pv_array ( pileup1
, final_length1
, pileup2
, final_length2
, &final_length_max
);
507 printf( "max of pileup 1 and 2\n" );
508 for ( i
= 0; i
< final_length_max
; i
++ )
510 printf( "pos:%d value:%.2f\n", max_pileup
[i
].pos
, max_pileup
[i
].value
);
513 printf( "write to bedGraph\n" );
515 write_pv_array_to_bedGraph ( max_pileup
, final_length_max
, "chr1", "test.bdg", 0 );