modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / c_cpp / etc / KaKs_Calculator / src / base.h
blobe6d72bbe11d980f18aed2a0a758b50ca6486de6f
1 /******************************************************************
2 * Copyright (c) 2006, BGI of Chinese Academy of Sciences
3 * All rights reserved.
5 * Filename: base.h
6 * Abstract: Declaration of base class for KaKs methods.
8 * Version: 1.0
9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
10 * Date: Feb.2, 2005
12 * Modified Version: 1.2
13 * Modified Author: ZZ
14 * Modified Date: Apr. 2006
15 ******************************************************************/
16 #pragma warning(disable:4786) //Disable warning when using vector
18 #if !defined(BASE_H)
19 #define BASE_H
21 #define VERSION "1.2, Apr. 2006"
23 #define CODONLENGTH 3 //Length of codon
24 #define DNASIZE 4 //A C G T
25 #define XSIZE DNASIZE*DNASIZE //Size of one group AXX (X=A,C,G,T)
26 #define CODON 64 //Codon Size
27 #define NULL 0 //Zero
28 #define NA -1 //Not Available
29 #define NCODE 23 //Number of genetic codes
30 #define NNCODE NCODE*2 //Double of the number genetic codes
31 #define SMALLVALUE 1e-6 //Value near to zero
32 #define NUMBER_OF_RATES 6 //Number of substitution rates
33 #define MODELCOUNT 14 //Number of candidate models
35 #define gammap(x,alpha) (alpha*(1-pow(x,-1/alpha)))
36 #define square(a) ((a)*(a))
38 #define min2(a,b) ((a)<(b)?(a):(b))
39 #define max2(a,b) ((a)>(b)?(a):(b))
40 #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
42 #include <string.h>
43 /* Stanard lib of C++ */
44 #include<string>
45 #include<iostream>
46 #include<sstream>
47 #include<fstream>
48 #include<vector>
49 #include<stdlib.h>
50 #include<math.h>
51 #include<time.h>
54 using namespace std;
56 /*****Global variables*****/
57 extern const char* transl_table[]; //Genetic codon table
58 extern int genetic_code; //ID of codon table from 1 to 23
60 extern string seq_name; //Pairwise sequences' name
61 extern unsigned long length; //Length of compared sequences
62 extern double GC[4]; //GC Content of entire sequences(GC[0]) and three codon positions (GC[1--3])
63 //End of Global variables
66 /* Convert one type to any other type */
67 template<class out_type,class in_value>
68 out_type CONVERT(const in_value & t) {
69 stringstream stream;
70 //Put the value 't' into the stream
71 stream<<t;
72 out_type result;
73 //Put the stream into the 'result'
74 stream>>result;
76 return result;
79 class Base {
81 public:
82 Base();
84 /* Write the content into file */
85 bool writeFile(string output_filename, const char* result);
87 /* Parse results */
88 string parseOutput();
89 /* Format string for outputing into file */
90 void addString(string &result, string str, string flag="\t");
92 /* Generate a radnom integer */
93 int getRandom();
95 /* Convert a char-T,C,A,G into a digit 0,1,2,3, respectively */
96 int convertChar(char ch);
97 /* Convert a digit-0,1,2,3 into a char T,C,A,G, respectively */
98 char convertInt(int ch);
99 /* Convert a string to uppercase */
100 string stringtoUpper(string str);
102 /* Calculate the amino acid of codon by codon */
103 char getAminoAcid(string codon);
104 /* Calculate the amino acid of codon by codon's id*/
105 char getAminoAcid(int id);
106 /* Get the number of stop codon in a given genetic code table */
107 int getNumNonsense(int genetic_code);
109 /* Return the codon's id from codon table */
110 int getID(string codon);
111 /* Return a codon according to the id */
112 string getCodon(int IDcodon);
114 /* Sum array's elements */
115 double sumArray(double x[], int end, int begin=0);
116 int sumArray(int x[], int end, int begin=0);
118 /* Init value to array */
119 int initArray(double x[], int n, double value=0.0);
120 int initArray(int x[], int n, int value=0);
122 /* Elements in array are mutipled by scale */
123 int scaleArray(double scale, double x[], int n);
124 /* Sqrt of the sum of the elements' square */
125 double norm(double x[], int n);
126 /* Copy array's values one by one: to[] = from[] */
127 int copyArray(double from[], double to[], int n);
128 /* Sum of 'n' products multiplied by two elements x[], y[] */
129 double innerp(double x[], double y[], int n);
130 /* Set x[i,j]=0 when x!=j and x[i,j]=1 when x=j */
131 int initIdentityMatrix(double x[], int n);
133 protected:
134 /* Compute p-value by Fisher exact test to justify the validity of ka/ks */
135 double fisher(double cs, double us, double cn, double un);
136 /* factorial */
137 double factorial(double n);
140 protected:
141 /* Name of method for calculating ka/ks */
142 string name;
143 /* Sysnonymous sites(S) and nonsysnonymous sites(N) */
144 double S, N;
145 /* Number of sysnonymous differences(Sd) and nonsysnonymous differences(Nd), snp=Sd+Nd */
146 double Sd, Nd, snp;
147 /* Number of sysnonymous substitutions(ks) and nonsysnonymous substitutions(ka) per site */
148 double Ka, Ks;
149 /* Standard Errors for Ka and Ks */
150 double SEKa, SEKs;
151 /* Transitional(Si) and transversional(Vi) substitutions */
152 double Si[5], Vi[5];
153 /* 0-fold, 2-fold, 4-fold */
154 double L[5];
155 /* Total Numbers of substitutions per i-th type site: K[i]=A[i]+B[i] */
156 double K[5];
157 /* T C A G
158 T - 6 7 8
159 C 0 - 9 10
160 A 1 3 - 11
161 G 2 4 5 - */
163 /* Transition/transversion rate ratio, tc: between pyrimidines, ag: between purines */
164 double kappa, kappatc, kappaag;
166 public:
167 /* Divergence time, substitutions per site: t = (S*Ks+N*Ka)/(S+N)*/
168 double t;
169 /* Maximum Likelihood value */
170 double lnL;
171 /* Akaike Information Criterion (AICc) */
172 double AICc;
173 /* Akaike Weight in Model Selection */
174 double AkaikeWeight;
175 /* Name of mutation model according to AICc */
176 string model;
177 /* Transition/transversion mutation ratio(s) or substitution rates */
178 double KAPPA[NUMBER_OF_RATES];
180 protected:
181 /* Store Maximum Likilhood results for Model Selection and Model Averaging */
182 struct MLResult {
183 string result; //estimated results
184 double AICc; //the value of a modified AIC
185 double freq[CODON]; //Codon frequency
186 double rate[NUMBER_OF_RATES]; //Six subsitution rates
187 double w; //Ka/Ks
188 double t; //divergence time
193 #endif