modified: nfig1.py
[GalaxyCodeBases.git] / BGI / SOAPsnp / SfsMethod.h
blob8816e97a7c0be42c21aac71faf2da666944d99f7
1 /*
2 ******************************************************************************
3 *Copyright 2010
4 * BGI-SHENZHEN
5 *All Rights Reserved
6 *ATHUOR : Bill Tang
7 *CREATE DATE : 2010-9-13
8 *CLASS NAME: SfsMethod
9 *FUNCTION : a class that contain method about sfs.
10 *FILE NAME : SfsMethod.h
11 *UPDATE DATE : 2010-9-13
12 *UPDATE BY : Bill Tang
13 *UPDATE DATE : 2010-10-13
14 *UPDATE BY : Bill Tang
15 *******************************************************************************
18 #pragma once
20 #include <map>
21 #include <vector>
22 #include <cmath>
23 #include <cstring>
24 #include "assert.h"
25 #include <iostream>
26 #include <fstream>
27 #include <pthread.h>
28 #include <semaphore.h>
29 //#include <string.h>
31 using namespace std;
33 class Files;
34 class Parameter;
35 class Pos_info;
36 class Prob_matrix;
38 /*soaplikelihood to sfslikelihood*/
39 const int sfs_type[10]={0,1,3,2,5,7,6,15,11,10}; // AA,AC,AG,AT,CC,CG,CT,GG,GT,TT
41 typedef struct _loci{
42 string chromo;
43 int position;
44 /* bool operator <(const _loci lc2) const
46 if (chromo == lc2.chromo)
47 return position < lc2.position;
48 else
49 return chromo < lc2.chromo;
52 }loci;
54 struct loci_hash
56 size_t operator()(const loci& lc) const
58 return size_t(lc.chromo + lc.position);
60 };*/
63 This struct is all the data we need to have for each site for all individuals
66 typedef struct {
67 int* locus; //4 times number of individuals {A,C,T,G} +4 the last 4 will be the sum
68 //unsigned char *lk; //3 times the number of individuals {AA,Aa,aa}
69 int *lk; //10 times the number of individuals {AA,Aa,aa}
70 short int major; //either ACGT {0...3} a
71 short int minor; //either ACGT {0...3} A
72 int ref;
73 //it might be overzealous to keep this in the datum...
74 //but it might prove handy for debug purposes
75 double phat;
76 // int numHits; //counter for keeping track of which files has data for a loci
77 // int position; // allele's position.
78 // std::string chromo; // the chromone ID.
79 // bool is_be_record; // record that if the pos have bean in.
80 }datum;
82 //comparison operator used when comparing loci
83 struct cmp_loci {
84 bool operator()(const loci& first,const loci& second) const {
85 if(first.chromo==second.chromo)
87 return first.position<second.position;
89 else
91 return first.chromo<second.chromo;
96 struct cmp_loci {
97 bool operator()(const loci& first,const loci& second) const {
98 return ((first.chromo == second.chromo) && (first.position == second.position));
102 struct cmp_char {
103 bool operator()(const char* first,const char* second) const {
104 int tmp = std::strcmp(first, second);
105 return tmp<0;
109 //typedef boost::unordered_map<loci, datum, loci_hash, cmp_loci> aMap; // mapping of the reads per locus
110 typedef std::map<loci, datum, cmp_loci> aMap; // mapping of the reads per locus
111 typedef std::map<char *,int,cmp_char> cMap; // mapping of the chromoname -> int id
112 typedef datum aVector; // the vector that store the datum.
114 class SfsMethod
116 public:
117 SfsMethod();
118 SfsMethod(int numInds);
119 virtual ~SfsMethod(void);
120 // the main function for the sfs, update 11-29 add int sfsfirstlast
121 virtual void algo(aMap & asso, int numInds, double eps, double pvar, double B, FILE * sfsfile, int normalize, int alternative, int singleMinor, double pThres, int allowPhatZero, int firstlast);
122 virtual void algoJoint(aMap & asso, int numInds, double eps, double pvar, double B, FILE * sfsfile, int underFlowProtect, int alternative, int singleMinor, int fold);
123 // write the freqfile
124 void writeFreq(FILE * pFile, int numInds, aMap & asso);
125 // allocation functions, this shouldn't look magical for anyone
126 virtual double* allocDoubleArray(int len);
127 //update 11-16
128 sem_t sem_call_sfs;
129 //sem_t sem_map_change;
130 sem_t sem_call_sfs_return ;
131 //sem_t sem_map_number;
132 int file_end_flag;
133 private:
134 // genome likelihood Format look up table
135 int glfLookup[4][4];
136 //used for offsetting each frame in the array of loglikeratios for a site
137 enum {AA = 0, Aa = 1, aa = 2};
138 //used for offsetting each frame in the array of basereads for a site
139 enum {A = 0, C = 1, G = 2, T = 3};
140 //to speed up calculations, we will generate a likelookup table for all values 0-255.
141 double likeLookup[256];
142 //The index will then be the value from one of the columns in the glf file
143 double *binomLookup;
144 //this is the result array
145 double *bayes;
146 double *likes;
147 // aMap pointer.
148 aMap asso[3];
149 //the map index
150 int m_map_idx;
151 int m_map_idx_process; // the index of the process map
152 // aVector *asso;
153 // individual number
154 int m_numInds;
155 // the chr index map.
156 cMap m_faiIndex;
157 // the thread synchronization lock.
158 static pthread_mutex_t m_pthreadMutex;
159 public:
160 // test for infinity
161 int isinf_local(double x);
162 // test for a NaN
163 int isnan_local(double x);
164 // add up all the value form the ary
165 double sum(const double* ary, int len);
166 // init the like look up table.
167 void generate_likeLookup();
168 // count ratio
169 double calcLikeRatio(int a)
171 return (pow(10.0,-a/10.0));
173 // get Max id in the array d.
174 int getMaxId(double *d,int len);
175 // input should be number of individuals
176 void generate_binom(int k);
177 // FUNCTION: generate factln
178 double factln(int n);
179 // To calculate the nCk ( n combination k)
180 double bico(int n, int k);
181 // generate gammln
182 double gammln(double xx);
183 // clean map
184 void cleanUpMap(aMap& asso);
185 // get char from int
186 char getChar(int i);
187 // add protect
188 double addProtect(double *ary,int len);
189 // add three protect
190 double addProtect3(double a,double b, double c);
191 // add two protect
192 double addProtect2(double a,double b);
193 // print array
194 void print_array(double *ary,int len);
195 // alloc datum memery.
196 datum allocDatum(int numInds);
197 // alloc array memery.
198 int* allocIntArray(int len);
199 // will calculate the sums and input the values in the last 4 slots, and set the minor, major accordingly
200 void calcSumBias(aMap & asso,int numInds);
201 // FUNCTION: soaplikelihood to sfslikelihood
202 void soaplk_sfslk(int *likelihood, double *type_likely, int id);
203 //copy source's coverage to the dest
204 void cov_Cpy(int *dest, int const *source, int id);
205 // alloc map memery
206 virtual void allocVec(void);
207 // free the map memery
208 virtual void delMap(aMap &amap);
209 // do SFS
210 int call_SFS(Parameter* para, Files* files);
211 // get the data what sfs want
212 int getMapData(const Pos_info& site, const Prob_matrix* mat, const std::string & chr, const int id);
213 // initialization
214 int init(const int numInds);
215 // This builds a map from: char* -> int
216 virtual void buildMap(const char* fname);
217 // free the vector's data memery
218 virtual void delVector(aVector * avec);
219 // get data what sfs want
220 virtual int getSFSData(const Pos_info & site, const Prob_matrix * mat, const std::string & chr, const int id);
221 // clean up the vector's member.
222 void cleanVec(void);
223 //update 11-16
224 // change map
225 void mapChange(void);
226 //get the map index can be process
227 int getidxProcess(void);
228 //wait the map to be process
229 void setidxProcess(void);
232 //the call sfs structor
233 typedef struct _big_call_sfs_args
235 SfsMethod * sfsMethod;
236 Files *files;
237 Parameter * para;
239 inline _big_call_sfs_args(SfsMethod * a, Files * b, Parameter * c)
241 sfsMethod = a;
242 files = b;
243 para = c;
246 inline _big_call_sfs_args()
248 sfsMethod = NULL;
249 files = NULL;
250 para = NULL;
253 }BIG_CALL_SFS_ARGS;
255 void *_sfsMethod_callsfs(void * args);