Merge pull request #589 from tjni/clean-up-build-dependencies
[MACS.git] / MACS3 / Signal / cPosValCalculation.c
blobca9997d3606c678c0602398213623672eb1a875a
1 /* Time-stamp: <2020-11-23 14:52:31 Tao Liu>
3 This code is free software; you can redistribute it and/or modify it
4 under the terms of the BSD License (see the file LICENSE included
5 with the distribution).
6 */
8 #include <stdio.h>
9 #include <stdlib.h>
11 #include "cPosValCalculation.h"
13 /* Simple compare function */
14 int cmpfunc_simple (const void * a, const void * b)
16 return ( *(int*)a - *(int*)b );
19 /* Fix coordinates
21 Input:
23 1. int * poss: must be sorted!
25 Return:
28 int * fix_coordinates ( int * poss, long l, int leftmost_coord, int rightmost_coord )
30 long i;
31 for ( i = 0; i < l; i++ )
33 if ( poss[ i ] < leftmost_coord )
34 poss[ i ] = leftmost_coord;
35 else
36 break;
39 for ( i = l-1; i > -1; i-- )
41 if ( poss[ i ] > rightmost_coord )
42 poss[ i ] = rightmost_coord;
43 else
44 break;
46 return poss;
49 /* Pileup function for single end data.
50 Input:
51 1. int * plus_tags: the 5' ends of tags aligned to plus strand. Start from 0.
52 2. long l_plus_tags: number of plus tags.
53 3. int * minus_tags: the 3' ends of tags aligned to minus strand. Start from 0.
54 4. long l_minus_tags: number of minus tags.
55 5. int five_shift: shift value at 5' end to recover the DNA fragment
56 6. int three_shift: shift value at 3' end to recover the DNA fragment
57 7. int leftmost_coord: any coords smaller than it will be set to it
58 8. int rightmost_coord: any coords larger than it will be set to it
59 9. float scale_factor: scale factor on pileup
60 10. float baseline_value: the minimum value of pileup
62 Return:
63 1. PosVal *: position-value pairs in bedGraph fashion.
64 2. long * final_length: the final length for the returned array of PosVal.
66 Example of usage:
68 pileup = single_end_pileup ( {1,2,3}, 3, {2,3,4}, 3, 0, 3, 0, 20, 0.5, 1.0, &final_length );
69 for ( i = 0; i < final_length; i++ )
71 printf( "pos:%d value:%.2f\n", pileup[i].pos, pileup[i].value );
75 struct PosVal * single_end_pileup( int * plus_tags, long l_plus_tags, int * minus_tags, long l_minus_tags, int five_shift, int three_shift, int leftmost_coord, int rightmost_coord, float scale_factor, float baseline_value, long * final_length )
77 long i_s, i_e, i;
78 int p, pre_p, pileup;
79 long l = l_plus_tags + l_minus_tags;
80 int * start_poss, * end_poss, * ptr_start_poss, * ptr_end_poss;
81 struct PosVal * pos_value_array;
83 start_poss = (int *) malloc( l * sizeof( int ) );
84 end_poss = (int *) malloc( l * sizeof( int ) );
86 ptr_start_poss = start_poss;
87 ptr_end_poss = end_poss;
89 for ( i = 0; i < l_plus_tags; i++ )
91 *ptr_start_poss = plus_tags[ i ] - five_shift; ptr_start_poss++;
92 *ptr_end_poss = plus_tags[ i ] + three_shift; ptr_end_poss++;
95 for ( i = 0; i < l_minus_tags; i++ )
97 *ptr_start_poss = minus_tags[ i ] - three_shift; ptr_start_poss++;
98 *ptr_end_poss = minus_tags[ i ] + five_shift; ptr_end_poss++;
101 qsort( start_poss, l, sizeof( int ), cmpfunc_simple );
102 qsort( end_poss, l, sizeof( int ), cmpfunc_simple );
104 // fix negative coordinations and those extends over end of chromosomes
105 start_poss = fix_coordinates ( start_poss, l, leftmost_coord, rightmost_coord );
106 end_poss = fix_coordinates ( end_poss, l, leftmost_coord, rightmost_coord );
108 pos_value_array = quick_pileup ( start_poss, end_poss, l, scale_factor, baseline_value, final_length );
110 // clean mem
111 free( start_poss );
112 free( end_poss );
113 return pos_value_array;
116 /* Assume start_poss and end_poss have been sorted and the coordinates have been fixed. */
117 struct PosVal * quick_pileup ( int * start_poss, int * end_poss, long length_poss, float scale_factor, float baseline_value, long * final_length )
119 long i_s, i_e, i, I;
120 int p, pre_p, pileup;
121 struct PosVal * pos_value_array, * ptr_pos_value_array;
122 int * ptr_start_poss, * ptr_end_poss;
123 long l = length_poss;
125 pos_value_array = (struct PosVal *) malloc ( 2 * l * sizeof( struct PosVal ) );
127 i_s = 0; i_e = 0;
129 ptr_pos_value_array = pos_value_array; ptr_start_poss = start_poss; ptr_end_poss = end_poss;
131 pileup = 0;
132 pre_p = min( *start_poss, *end_poss );
133 ptr_start_poss++; ptr_end_poss++;
135 I = 0;
136 if ( pre_p != 0 )
138 (*ptr_pos_value_array).pos = pre_p;
139 (*ptr_pos_value_array).value = max( 0, baseline_value );
140 ptr_pos_value_array++; I++;
143 ptr_start_poss = start_poss;
144 ptr_end_poss = end_poss;
146 while (i_s < l && i_e < l)
148 if ( *ptr_start_poss < *ptr_end_poss )
150 p = *ptr_start_poss;
151 if ( p != pre_p )
153 (*ptr_pos_value_array).pos = p;
154 (*ptr_pos_value_array).value = max( pileup * scale_factor, baseline_value );
155 ptr_pos_value_array++; I++;
156 pre_p = p;
158 pileup += 1;
159 i_s += 1;
160 ptr_start_poss++;
162 else if ( *ptr_start_poss > *ptr_end_poss )
164 p = *ptr_end_poss;
165 if ( p != pre_p )
167 (*ptr_pos_value_array).pos = p;
168 (*ptr_pos_value_array).value = max( pileup * scale_factor, baseline_value );
169 ptr_pos_value_array++; I++;
170 pre_p = p;
172 pileup -= 1;
173 i_e += 1;
174 ptr_end_poss++;
176 else
178 i_s += 1;
179 i_e += 1;
180 ptr_start_poss++;
181 ptr_end_poss++;
185 // add the rest of end positions.
186 if ( i_e < l )
188 for ( i = i_e; i < l; i++ )
190 p = *ptr_end_poss;
191 if ( p != pre_p )
193 (*ptr_pos_value_array).pos = p;
194 (*ptr_pos_value_array).value = max( pileup * scale_factor, baseline_value );
195 ptr_pos_value_array++; I++;
196 pre_p = p;
198 pileup -= 1;
199 ptr_end_poss++;
202 pos_value_array = (struct PosVal *) realloc ( pos_value_array, I * sizeof( struct PosVal ) );
203 *final_length = I; /* return the final length of pos_value_array */
204 return pos_value_array;
207 long quick_pileup_simple ( int * ret_poss, float * ret_values, int * start_poss, int * end_poss, long length_poss, float scale_factor, float baseline_value )
209 long i_s, i_e, i, I;
210 int p, pre_p, pileup;
211 int * ptr_ret_poss;
212 int * ptr_start_poss, * ptr_end_poss;
213 float * ptr_ret_values;
214 long l = length_poss;
216 ptr_ret_poss = ret_poss; ptr_ret_values = ret_values;
217 ptr_start_poss = start_poss; ptr_end_poss = end_poss;
219 i_s = 0; i_e = 0;
221 pileup = 0;
222 pre_p = min( *ptr_start_poss, *ptr_end_poss );
223 ptr_start_poss++; ptr_end_poss++;
225 I = 0;
226 if ( pre_p != 0 )
228 *ptr_ret_poss = pre_p;
229 *ptr_ret_values = max( 0, baseline_value );
230 ptr_ret_poss++; ptr_ret_values++; I++;
233 while (i_s < l && i_e < l)
235 if ( *ptr_start_poss < *ptr_end_poss )
237 p = *ptr_start_poss;
238 if ( p != pre_p )
240 *ptr_ret_poss = p;
241 *ptr_ret_values = max( pileup * scale_factor, baseline_value );
242 ptr_ret_poss++;ptr_ret_values++; I++;
243 pre_p = p;
245 pileup += 1;
246 i_s += 1;
247 ptr_start_poss++;
249 else if ( *ptr_start_poss > *ptr_end_poss )
251 p = *ptr_end_poss;
252 if ( p != pre_p )
254 *ptr_ret_poss = p;
255 *ptr_ret_values = max( pileup * scale_factor, baseline_value );
256 ptr_ret_poss++;ptr_ret_values++; I++;
257 pre_p = p;
259 pileup -= 1;
260 i_e += 1;
261 ptr_end_poss++;
263 else
265 i_s += 1;
266 i_e += 1;
267 ptr_start_poss++;
268 ptr_end_poss++;
272 // add the rest of end positions.
273 if ( i_e < l )
275 for ( i = i_e; i < l; i++ )
277 p = *ptr_end_poss;
278 if ( p != pre_p )
280 *ptr_ret_poss = p;
281 *ptr_ret_values = max( pileup * scale_factor, baseline_value );
282 ptr_ret_poss++;ptr_ret_values++; I++;
283 pre_p = p;
285 pileup -= 1;
286 ptr_end_poss++;
289 return I;
292 /* Calculate the maximum value between two sets of PosVal arrays (like bedGraph type of data) */
293 struct PosVal * max_over_two_pv_array ( struct PosVal * pva1, long l_pva1, struct PosVal * pva2, long l_pva2, long * final_length )
295 struct PosVal * ptr_pva1, * ptr_pva2;
296 struct PosVal * ret_pva, * ptr_ret_pva;
297 long i, i1, i2, I;
299 ptr_pva1 = pva1; ptr_pva2 = pva2;
300 ret_pva = ( struct PosVal * ) malloc ( ( l_pva1 + l_pva2 ) * sizeof( struct PosVal ) );
301 ptr_ret_pva = ret_pva;
303 i1 = i2 = 0;
305 I = 0;
307 while ( i1< l_pva1 && i2 < l_pva2 )
309 if ( (*ptr_pva1).pos < (*ptr_pva2).pos )
311 (*ptr_ret_pva).pos = (*ptr_pva1).pos;
312 (*ptr_ret_pva).value = max( (*ptr_pva1).value, (*ptr_pva2).value );
313 ptr_ret_pva++;I++;
314 ptr_pva1++; i1++;
316 else if ( (*ptr_pva1).pos > (*ptr_pva2).pos )
318 (*ptr_ret_pva).pos = (*ptr_pva2).pos;
319 (*ptr_ret_pva).value = max( (*ptr_pva1).value, (*ptr_pva2).value );
320 ptr_ret_pva++;I++;
321 ptr_pva2++; i2++;
323 else // (*ptr_pva1).pos == (*ptr_pva2).pos
325 (*ptr_ret_pva).pos = (*ptr_pva1).pos;
326 (*ptr_ret_pva).value = max( (*ptr_pva1).value, (*ptr_pva2).value );
327 ptr_ret_pva++;I++;
328 ptr_pva1++; i1++;
329 ptr_pva2++; i2++;
333 *final_length = I;
334 return ret_pva;
337 /* Calculate using specified function between two sets of PosVal arrays (like bedGraph type of data) */
338 struct PosVal * apply_func_two_pv_array ( float (*func)(float, float), struct PosVal * pva1, long l_pva1, struct PosVal * pva2, long l_pva2, long * final_length )
340 struct PosVal * ptr_pva1, * ptr_pva2;
341 struct PosVal * ret_pva, * ptr_ret_pva;
342 long i, i1, i2, I;
344 ptr_pva1 = pva1; ptr_pva2 = pva2;
345 ret_pva = ( struct PosVal * ) malloc ( ( l_pva1 + l_pva2 ) * sizeof( struct PosVal ) );
346 ptr_ret_pva = ret_pva;
348 i1 = i2 = 0;
350 I = 0;
352 while ( i1< l_pva1 && i2 < l_pva2 )
354 if ( (*ptr_pva1).pos < (*ptr_pva2).pos )
356 (*ptr_ret_pva).pos = (*ptr_pva1).pos;
357 (*ptr_ret_pva).value = (*func)( (*ptr_pva1).value, (*ptr_pva2).value );
358 ptr_ret_pva++;I++;
359 ptr_pva1++; i1++;
361 else if ( (*ptr_pva1).pos > (*ptr_pva2).pos )
363 (*ptr_ret_pva).pos = (*ptr_pva2).pos;
364 (*ptr_ret_pva).value = (*func)( (*ptr_pva1).value, (*ptr_pva2).value );
365 ptr_ret_pva++;I++;
366 ptr_pva2++; i2++;
368 else // (*ptr_pva1).pos == (*ptr_pva2).pos
370 (*ptr_ret_pva).pos = (*ptr_pva1).pos;
371 (*ptr_ret_pva).value = (*func)( (*ptr_pva1).value, (*ptr_pva2).value );
372 ptr_ret_pva++;I++;
373 ptr_pva1++; i1++;
374 ptr_pva2++; i2++;
378 *final_length = I;
379 return ret_pva;
382 /* Align two PosVal arrays according to their overlaps */
383 struct PosValVal * align_two_pv_array ( struct PosVal * pva1, long l_pva1, struct PosVal * pva2, long l_pva2, long * final_length )
385 struct PosVal * ptr_pva1, * ptr_pva2;
386 struct PosValVal * ret_pvva, * ptr_ret_pvva;
387 long i, i1, i2, I;
389 ptr_pva1 = pva1; ptr_pva2 = pva2;
390 ret_pvva = ( struct PosValVal * ) malloc ( ( l_pva1 + l_pva2 ) * sizeof( struct PosValVal ) );
391 ptr_ret_pvva = ret_pvva;
393 i1 = i2 = 0;
395 I = 0;
397 while ( i1< l_pva1 && i2 < l_pva2 )
399 if ( (*ptr_pva1).pos < (*ptr_pva2).pos )
401 (*ptr_ret_pvva).pos = (*ptr_pva1).pos;
402 (*ptr_ret_pvva).value1 = (*ptr_pva1).value;
403 (*ptr_ret_pvva).value2 = (*ptr_pva2).value;
404 ptr_ret_pvva++;I++;
405 ptr_pva1++; i1++;
407 else if ( (*ptr_pva1).pos > (*ptr_pva2).pos )
409 (*ptr_ret_pvva).pos = (*ptr_pva2).pos;
410 (*ptr_ret_pvva).value1 = (*ptr_pva1).value;
411 (*ptr_ret_pvva).value2 = (*ptr_pva2).value;
412 ptr_ret_pvva++;I++;
413 ptr_pva2++; i2++;
415 else // (*ptr_pva1).pos == (*ptr_pva2).pos
417 (*ptr_ret_pvva).pos = (*ptr_pva1).pos;
418 (*ptr_ret_pvva).value1 = (*ptr_pva1).value;
419 (*ptr_ret_pvva).value2 = (*ptr_pva2).value;
420 ptr_ret_pvva++;I++;
421 ptr_pva1++; i1++;
422 ptr_pva2++; i2++;
426 *final_length = I;
427 return ret_pvva;
430 /* Write pos-value array to a bedGraph file. If append is non-zero then just add content to the existing file. */
431 void write_pv_array_to_bedGraph ( struct PosVal * pv_array, long l_pv_array, char * chromosome, char * bdgfile, short append )
433 int pre_s, pre_e;
434 float pre_v;
435 long i;
436 FILE * fp;
438 if ( append > 0 )
439 fp = fopen ( bdgfile, "a" );
440 else
441 fp = fopen ( bdgfile, "w" );
443 pre_s = 0;
445 pre_e = (*pv_array).pos;
446 pre_v = (*pv_array).value;
448 pv_array += 1;
450 for ( i = 1; i < l_pv_array; i++ )
452 if ( (*pv_array).value != pre_v )
454 fprintf ( fp, "%s\t%d\t%d\t%.5f\n", chromosome, pre_s, pre_e, pre_v );
455 pre_s = pre_e;
456 pre_e = (*pv_array).pos;
457 pre_v = (*pv_array).value;
459 else
461 pre_e = (*pv_array).pos;
463 pv_array ++;
466 /* last piece */
467 fprintf ( fp, "%s\t%d\t%d\t%.5f\n", chromosome, pre_s, pre_e, pre_v );
469 fclose( fp );
472 /* for testing */
473 int main()
475 int i;
476 int five_shift = 0;
477 int three_shift = 2;
478 int p_array1[3] = {11,12,13};
479 int p_array2[3] = {12,13,14};
480 int m_array1[3] = {12,13,14};
481 int m_array2[3] = {13,14,15};
482 int leftmost_coord = 0;
483 int rightmost_coord = 20;
484 float scale_factor = 0.5;
485 long final_length1 = 0;
486 long final_length2 = 0;
487 long final_length_max = 0;
488 struct PosVal * pileup1;
489 struct PosVal * pileup2;
490 struct PosVal * max_pileup;
492 pileup1 = single_end_pileup ( p_array1, 3, m_array1, 3, five_shift, three_shift, leftmost_coord, rightmost_coord, scale_factor, 0, &final_length1 );
493 pileup2 = single_end_pileup ( p_array2, 3, m_array2, 3, five_shift, three_shift, leftmost_coord, rightmost_coord, scale_factor, 0, &final_length2 );
495 printf( "pileup 1\n" );
496 for ( i = 0; i < final_length1; i++ )
498 printf( "pos:%d value:%.2f\n", pileup1[i].pos, pileup1[i].value );
500 printf( "pileup 2\n" );
501 for ( i = 0; i < final_length2; i++ )
503 printf( "pos:%d value:%.2f\n", pileup2[i].pos, pileup2[i].value );
506 max_pileup = max_over_two_pv_array ( pileup1, final_length1, pileup2, final_length2, &final_length_max );
507 printf( "max of pileup 1 and 2\n" );
508 for ( i = 0; i < final_length_max; i++ )
510 printf( "pos:%d value:%.2f\n", max_pileup[i].pos, max_pileup[i].value );
513 printf( "write to bedGraph\n" );
515 write_pv_array_to_bedGraph ( max_pileup, final_length_max, "chr1", "test.bdg", 0 );