2 ******************************************************************************
7 *CREATE DATE : 2010-9-13
9 *FUNCTION : a class that contain method about sfs.
10 *FILE NAME : SfsMethod.h
11 *UPDATE DATE : 2010-9-13
12 *UPDATE BY : Bill Tang
13 *UPDATE DATE : 2010-10-13
14 *UPDATE BY : Bill Tang
15 *******************************************************************************
28 #include <semaphore.h>
38 /*soaplikelihood to sfslikelihood*/
39 const int sfs_type
[10]={0,1,3,2,5,7,6,15,11,10}; // AA,AC,AG,AT,CC,CG,CT,GG,GT,TT
44 /* bool operator <(const _loci lc2) const
46 if (chromo == lc2.chromo)
47 return position < lc2.position;
49 return chromo < lc2.chromo;
56 size_t operator()(const loci& lc) const
58 return size_t(lc.chromo + lc.position);
63 This struct is all the data we need to have for each site for all individuals
67 int* locus
; //4 times number of individuals {A,C,T,G} +4 the last 4 will be the sum
68 //unsigned char *lk; //3 times the number of individuals {AA,Aa,aa}
69 int *lk
; //10 times the number of individuals {AA,Aa,aa}
70 short int major
; //either ACGT {0...3} a
71 short int minor
; //either ACGT {0...3} A
73 //it might be overzealous to keep this in the datum...
74 //but it might prove handy for debug purposes
76 // int numHits; //counter for keeping track of which files has data for a loci
77 // int position; // allele's position.
78 // std::string chromo; // the chromone ID.
79 // bool is_be_record; // record that if the pos have bean in.
82 //comparison operator used when comparing loci
84 bool operator()(const loci
& first
,const loci
& second
) const {
85 if(first
.chromo
==second
.chromo
)
87 return first
.position
<second
.position
;
91 return first
.chromo
<second
.chromo
;
97 bool operator()(const loci& first,const loci& second) const {
98 return ((first.chromo == second.chromo) && (first.position == second.position));
103 bool operator()(const char* first
,const char* second
) const {
104 int tmp
= std::strcmp(first
, second
);
109 //typedef boost::unordered_map<loci, datum, loci_hash, cmp_loci> aMap; // mapping of the reads per locus
110 typedef std::map
<loci
, datum
, cmp_loci
> aMap
; // mapping of the reads per locus
111 typedef std::map
<char *,int,cmp_char
> cMap
; // mapping of the chromoname -> int id
112 typedef datum aVector
; // the vector that store the datum.
118 SfsMethod(int numInds
);
119 virtual ~SfsMethod(void);
120 // the main function for the sfs, update 11-29 add int sfsfirstlast
121 virtual void algo(aMap
& asso
, int numInds
, double eps
, double pvar
, double B
, FILE * sfsfile
, int normalize
, int alternative
, int singleMinor
, double pThres
, int allowPhatZero
, int firstlast
);
122 virtual void algoJoint(aMap
& asso
, int numInds
, double eps
, double pvar
, double B
, FILE * sfsfile
, int underFlowProtect
, int alternative
, int singleMinor
, int fold
);
123 // write the freqfile
124 void writeFreq(FILE * pFile
, int numInds
, aMap
& asso
);
125 // allocation functions, this shouldn't look magical for anyone
126 virtual double* allocDoubleArray(int len
);
129 //sem_t sem_map_change;
130 sem_t sem_call_sfs_return
;
131 //sem_t sem_map_number;
134 // genome likelihood Format look up table
136 //used for offsetting each frame in the array of loglikeratios for a site
137 enum {AA
= 0, Aa
= 1, aa
= 2};
138 //used for offsetting each frame in the array of basereads for a site
139 enum {A
= 0, C
= 1, G
= 2, T
= 3};
140 //to speed up calculations, we will generate a likelookup table for all values 0-255.
141 double likeLookup
[256];
142 //The index will then be the value from one of the columns in the glf file
144 //this is the result array
151 int m_map_idx_process
; // the index of the process map
155 // the chr index map.
157 // the thread synchronization lock.
158 static pthread_mutex_t m_pthreadMutex
;
161 int isinf_local(double x
);
163 int isnan_local(double x
);
164 // add up all the value form the ary
165 double sum(const double* ary
, int len
);
166 // init the like look up table.
167 void generate_likeLookup();
169 double calcLikeRatio(int a
)
171 return (pow(10.0,-a
/10.0));
173 // get Max id in the array d.
174 int getMaxId(double *d
,int len
);
175 // input should be number of individuals
176 void generate_binom(int k
);
177 // FUNCTION: generate factln
178 double factln(int n
);
179 // To calculate the nCk ( n combination k)
180 double bico(int n
, int k
);
182 double gammln(double xx
);
184 void cleanUpMap(aMap
& asso
);
188 double addProtect(double *ary
,int len
);
190 double addProtect3(double a
,double b
, double c
);
192 double addProtect2(double a
,double b
);
194 void print_array(double *ary
,int len
);
195 // alloc datum memery.
196 datum
allocDatum(int numInds
);
197 // alloc array memery.
198 int* allocIntArray(int len
);
199 // will calculate the sums and input the values in the last 4 slots, and set the minor, major accordingly
200 void calcSumBias(aMap
& asso
,int numInds
);
201 // FUNCTION: soaplikelihood to sfslikelihood
202 void soaplk_sfslk(int *likelihood
, double *type_likely
, int id
);
203 //copy source's coverage to the dest
204 void cov_Cpy(int *dest
, int const *source
, int id
);
206 virtual void allocVec(void);
207 // free the map memery
208 virtual void delMap(aMap
&amap
);
210 int call_SFS(Parameter
* para
, Files
* files
);
211 // get the data what sfs want
212 int getMapData(const Pos_info
& site
, const Prob_matrix
* mat
, const std::string
& chr
, const int id
);
214 int init(const int numInds
);
215 // This builds a map from: char* -> int
216 virtual void buildMap(const char* fname
);
217 // free the vector's data memery
218 virtual void delVector(aVector
* avec
);
219 // get data what sfs want
220 virtual int getSFSData(const Pos_info
& site
, const Prob_matrix
* mat
, const std::string
& chr
, const int id
);
221 // clean up the vector's member.
225 void mapChange(void);
226 //get the map index can be process
227 int getidxProcess(void);
228 //wait the map to be process
229 void setidxProcess(void);
232 //the call sfs structor
233 typedef struct _big_call_sfs_args
235 SfsMethod
* sfsMethod
;
239 inline _big_call_sfs_args(SfsMethod
* a
, Files
* b
, Parameter
* c
)
246 inline _big_call_sfs_args()
255 void *_sfsMethod_callsfs(void * args
);