c_cpp/etc/KaKs_Calculator/src/YN00.cpp

   1 /************************************************************
   2 * Copyright (C) 2005, BGI of Chinese Academy of Sciences
   3 * All rights reserved.
   4
   5 * Filename: YN00.cpp
   6 * Abstract: Defination of YN00 class.
   7
   8 * Version: 1.0
   9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
  10 * Date: Jan.31, 2005
  11
  12 * Modified Version:
  13 * Modified Author:
  14 * Modified Date:
  15
  16   Note: Source codes are adapted from yn00.c in PAML.
  17
  18   Reference:
  19   Yang Z, Nielsen R  (2000)  Estimating Synonymous and
  20   Nonsynonymous Substitution Rates Under Realistic
  21   Evolutionary Models. Mol Biol Evol 17:32-43.
  22 *************************************************************/
  23
  24 #include "YN00.h"
  25
  26
  27 YN00::YN00() {
  28
  29         name = "YN";
  30         initArray(f12pos, 12);
  31         initArray(pi, 64);
  32         iteration = 1;
  33 }
  34
  35 void YN00::getFreqency(const string seq1, const string seq2) {
  36
  37         int i;
  38         double fstop=0.0;
  39
  40         //Get A,C,G,T frequency at three positions
  41         for(i=0; i<seq1.length(); i++) {
  42                 f12pos[(i%3)*4+convertChar(seq1[i])]++;
  43                 f12pos[(i%3)*4+convertChar(seq2[i])]++;
  44         }
  45         for(i=0; i<CODONFREQ; i++)
  46                 f12pos[i]/=(seq1.length()+seq2.length())/3;
  47
  48         //Get 64 amino acid probability
  49         for(i=0; i<CODON; i++) {
  50                 pi[i] = f12pos[i/16] * f12pos[4+(i%16)/4] * f12pos[8+i%4];
  51                 if (getAminoAcid(i)=='!') {
  52                         fstop+=pi[i];
  53                         pi[i]=0;
  54                 }
  55         }
  56         //Scale the sum of pi[] to 1
  57         for(i=0; i<CODON; i++)
  58                 pi[i]/=(1.0-fstop);
  59
  60         if (fabs(1-sumArray(pi,CODON))>1e-6)
  61                 cout<<"Warning: error in get codon freqency."<<endl;
  62
  63         for(i=0,npi0=0; i<CODON; i++)
  64                 if(pi[i])
  65                         pi_sqrt[npi0++]=sqrt(pi[i]);
  66
  67         npi0=CODON-npi0;
  68
  69 }
  70
  71
  72 /* Estimate kappa using the fourfold degenerate sites at third codon positions and nondegenerate sites */
  73 int YN00::GetKappa(const string seq1, const string seq2) {
  74
  75         int i,j,k,h,pos,c[2],aa[2],b[2][3],nondeg,fourdeg,by[3]={16,4,1};
  76         double ka[2], F[2][XSIZE],S[2],wk[2], T,V, pi4[4];
  77         double kdefault=2, nullValue=NULL, t;
  78
  79         for(k=0; k<2; k++)
  80                 initArray(F[k],16);
  81
  82         //Get Pi[] of A,C,G,T
  83         for(h=0; h<seq1.length(); h+=3) {
  84
  85                 //c[]: amino acid(0--63)
  86                 c[0]=getID(seq1.substr(h,3));
  87                 c[1]=getID(seq2.substr(h,3));
  88                 //aa[ ]: amino acid
  89                 aa[0]=getAminoAcid(c[0]);
  90                 aa[1]=getAminoAcid(c[1]);
  91                 //b[][]: 0--3
  92                 for(j=0; j<3; j++) {
  93                         b[0][j] = convertChar(seq1[h+j]);
  94                         b[1][j] = convertChar(seq2[h+j]);
  95                 }
  96
  97                 //Find non-degenerate sites
  98                 for(pos=0; pos<3; pos++) {
  99                         for(k=0,nondeg=0; k<2; k++) {
 100                                 for(i=0; i<4; i++) {
 101                                         if(i!=b[k][pos])
 102                                                 if (getAminoAcid(c[k]+(i-b[k][pos])*by[pos])==aa[k])
 103                                                         break;
 104                                 }
 105                                 if (i==4)
 106                                         nondeg++;
 107                         }
 108                         //F[0][]: 0-fold
 109                         if(nondeg==2) {
 110                                 F[0][b[0][pos]*4+b[1][pos]]+=.5;
 111                                 F[0][b[1][pos]*4+b[0][pos]]+=.5;
 112                         }
 113                 }
 114
 115                 //Find 4-fold degenerate sites at 3rd position
 116                 for(k=0,fourdeg=0;k<2;k++) {
 117                         for(j=0,i=c[k]-b[k][2]; j<4; j++)
 118                                 if(j!=b[k][2] && getAminoAcid(i+j)!=aa[k])
 119                                         break;
 120                         if(aa[0]==aa[1] && j==4)
 121                                 fourdeg++;
 122                 }
 123                 //F[1][]: 4-fold
 124                 if (fourdeg==2) {
 125                         F[1][b[0][2]*4+b[1][2]]+=.5;
 126                         F[1][b[1][2]*4+b[0][2]]+=.5;
 127                 }
 128
 129         }//end of for(h)
 130
 131         for(k=0; k<2; k++) {  /* two kinds of sites */
 132
 133                 S[k] = sumArray(F[k],16);
 134                 if(S[k]<=0) {
 135                         wk[k] = 0;
 136                         continue;
 137                 }
 138                 for(j=0; j<16; j++)
 139                         F[k][j]/=S[k];
 140
 141                 //Transition
 142                 T = (F[k][0*4+1]+F[k][2*4+3])*2;
 143                 //Tranversion
 144                 V = 1 - T - (F[k][0*4+0]+F[k][1*4+1]+F[k][2*4+2]+F[k][3*4+3]);
 145
 146                 //pi[]: the sum probabilty of T, C, A, G, respectively
 147                 for(j=0; j<4; j++) {
 148                         pi4[j] = sumArray(F[k]+j*4,4);
 149                 }
 150
 151                 //Correct kappa
 152                 DistanceF84(S[k],T,V,pi4, ka[k], t, nullValue);
 153                 wk[k]=(ka[k]>0?S[k]:0);
 154         }
 155
 156         if(wk[0]+wk[1]==0) {
 157                 kappa=kdefault;
 158         }
 159         else {
 160                 kappa=(ka[0]*wk[0]+ka[1]*wk[1])/(wk[0]+wk[1]);
 161         }
 162
 163         KAPPA[0] = KAPPA[1] = kappa;
 164
 165         return 0;
 166 }
 167
 168
 169 /* Correct for multiple substitutions */
 170 int YN00::DistanceF84(double n, double P, double Q, double pi4[], double &k_HKY, double &t, double &SEt) {
 171
 172         int failF84=0,failK80=0,failJC69=0;
 173         double tc,ag, Y,R, a=0,b=0, A,B,C, k_F84;
 174         double Qsmall=min(1e-10,0.1/n), maxkappa=2,maxt=99;
 175
 176         k_HKY=-1;
 177
 178         Y=pi4[0]+pi4[1];
 179         R=pi4[2]+pi4[3];
 180
 181         tc=pi4[0]*pi4[1];
 182         ag=pi4[2]*pi4[3];
 183
 184         if (P+Q>1) {
 185                 t=maxt;
 186                 k_HKY=1;
 187                 return 3;
 188         }
 189         if (P<-1e-10 || Q<-1e-10 || fabs(Y+R-1)>1e-8) {
 190                 return 3;
 191         }
 192
 193         //HKY85
 194         if(Q<Qsmall)
 195                 failF84=failK80=1;
 196         else if (Y<=0 || R<=0 || (tc<=0 && ag<=0))
 197                 failF84=1;
 198         else {
 199                 A=tc/Y+ag/R; B=tc+ag; C=Y*R;
 200                 a=(2*B+2*(tc*R/Y+ag*Y/R)*(1-Q/(2*C)) - P) / (2*A);
 201                 b=1-Q/(2*C);
 202                 if (a<=0 || b<=0)
 203                         failF84=1;
 204         }
 205
 206         if (!failF84) {
 207                 a=-.5*log(a); b=-.5*log(b);
 208                 if(b<=0)
 209                         failF84=1;
 210                 else {
 211                         k_F84=a/b-1;
 212                         t = 4*b*(tc*(1+ k_F84/Y)+ag*(1+ k_F84/R)+C);
 213                         k_HKY = (B + (tc/Y+ag/R)* k_F84)/B; /* k_F84=>k_HKY85 */
 214
 215                         //Standard errors
 216                         a = A*C/(A*C-C*P/2-(A-B)*Q/2);
 217                         b = A*(A-B)/(A*C-C*P/2-(A-B)*Q/2) - (A-B-C)/(C-Q/2);
 218                         SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
 219                 }
 220         }
 221
 222         //K80
 223         if(failF84 && !failK80) {  /* try K80 */
 224                 a=1-2*P-Q;  b=1-2*Q;
 225                 if (a<=0 || b<=0)
 226                         failK80=1;
 227                 else {
 228                         a=-log(a); b=-log(b);
 229                         if(b<=0)
 230                                 failK80=1;
 231                         else {
 232                                 k_HKY=(.5*a-.25*b)/(.25*b);
 233                                 t = .5*a+.25*b;
 234                         }
 235                         if(SEt) {
 236                                 a=1/(1-2*P-Q); b=(a+1/(1-2*Q))/2;
 237                                 SEt = sqrt((a*a*P+b*b*Q-square(a*P+b*Q))/n);
 238                         }
 239                 }
 240         }
 241
 242         if(failK80) {/* try JC69 */
 243                 if((P+=Q)>=.75) {
 244                         failJC69=1;
 245                         P=.75*(n-1.)/n;
 246                 }
 247                 t = -.75*log(1-P*4/3.);
 248
 249                 if(t>99)
 250                         t=maxt;
 251                 if(SEt) {
 252                         SEt = sqrt(9*P*(1-P)/n) / (3-4*P);
 253                 }
 254         }
 255
 256         if(k_HKY>99)
 257                 k_HKY=maxkappa;
 258
 259         return(failF84 + failK80 + failJC69);
 260 }
 261
 262 int YN00::DistanceYN00(const string seq1, const string seq2, double &dS,double &dN, double &SEKs, double &SEKa) {
 263
 264         int j,k,ir,nround=10, status=0;
 265         double fbS[4], fbN[4], fbSt[4], fbNt[4], St, Nt, Sdts, Sdtv, Ndts, Ndtv, k_HKY;
 266         double w0=0, dS0=0, dN0=0, accu=5e-4, minomega=1e-5, maxomega=99;
 267         double PMatrix[CODON*CODON];
 268
 269         if(t==0)
 270                 t=.5;
 271         if(omega<=0)
 272                 omega=1;
 273         for(k=0; k<4; k++)
 274                 fbS[k]=fbN[k]=0;
 275
 276         //Count sites of sequence 1
 277         S=N=0;
 278         CountSites(seq1, St, Nt, fbSt, fbNt);
 279         S+=St/2;
 280         N+=Nt/2;
 281         for(j=0; j<4; j++) {
 282                 fbS[j]+=fbSt[j]/2;
 283                 fbN[j]+=fbNt[j]/2;
 284         }
 285         //Count sites of sequence 2
 286         CountSites(seq2, St, Nt, fbSt, fbNt);
 287         S+=St/2;
 288         N+=Nt/2;
 289         for(j=0; j<4; j++) {
 290                 fbS[j]+=fbSt[j]/2;
 291                 fbN[j]+=fbNt[j]/2;
 292         }
 293
 294         if (t<0.001 || t>5)
 295                 t=0.5;
 296         if (omega<0.01 || omega>5)
 297                 omega=.5;
 298
 299
 300         for (ir=0; ir<(iteration?nround:1); ir++) {   /* iteration */
 301                 if(iteration)
 302                         GetPMatCodon(PMatrix,kappa,omega);
 303                 else
 304                         for(j=0; j<CODON*CODON; j++) {
 305                                 PMatrix[j]=1;
 306                         }
 307
 308                 CountDiffs(seq1,seq2, Sdts, Sdtv, Ndts, Ndtv, PMatrix);
 309
 310                 Sd = Sdts + Sdtv;
 311                 Nd = Ndts + Ndtv;
 312
 313                 //synonymous
 314                 DistanceF84(S, Sdts/S, Sdtv/S, fbS, k_HKY, dS, SEKs);
 315                 //nonsynonymous
 316                 DistanceF84(N, Ndts/N, Ndtv/N, fbN, k_HKY, dN, SEKa);
 317
 318                 if(dS<1e-9) {
 319                         status=-1;
 320                         omega=maxomega;
 321                 }
 322                 else {
 323                         omega= max(minomega, dN/dS);
 324                 }
 325
 326                 t = dS * 3 * S/(S + N) + dN * 3 * N/(S + N);
 327
 328                 if ( fabs(dS-dS0)<accu && fabs(dN-dN0)<accu && fabs(omega-w0)<accu )
 329                         break;
 330
 331                 dS0=dS;
 332                 dN0=dN;
 333                 w0=omega;
 334
 335         } //end of for(ir) */
 336
 337         if(ir==nround)
 338                 status=-2;
 339
 340         return status;
 341 }
 342
 343 //Count differences between two compared codons
 344 int YN00::CountDiffs(const string seq1, const string seq2, double &Sdts,double &Sdtv,double &Ndts, double &Ndtv,double PMatrix[]) {
 345         int h,i1,i2,i,k, transi, c[2],ct[2], by[3]={16,4,1};
 346         char aa[2];
 347         int dmark[3], step[3], b[2][3], bt1[3], bt2[3];
 348         int ndiff, npath, nstop, stspath[6],stvpath[6],ntspath[6],ntvpath[6];
 349         double sts,stv,nts,ntv; /* syn ts & tv, nonsyn ts & tv for 2 codons */
 350         double ppath[6], sump,p;
 351
 352         snp = 0;
 353         for (h=0,Sdts=Sdtv=Ndts=Ndtv=0; h<seq1.length(); h+=3)  {
 354
 355                 c[0]=getID(seq1.substr(h,3));
 356                 c[1]=getID(seq2.substr(h,3));
 357                 //Difference?
 358                 if (c[0]==c[1])
 359                         continue;
 360
 361                 for(i=0; i<2; i++) {
 362                         b[i][0]=c[i]/16;
 363                         b[i][1]=(c[i]%16)/4;
 364                         b[i][2]=c[i]%4;
 365                         aa[i]=getAminoAcid(c[i]);
 366                 }
 367
 368                 //ndiff: differences of two codons
 369                 ndiff=0;
 370                 sts=stv=nts=ntv=0;
 371                 //dmark[]: position of different codon
 372                 for(k=0; k<3; k++) {
 373                         dmark[k] = -1;
 374                         if (b[0][k]!=b[1][k])
 375                                 dmark[ndiff++]=k;
 376                 }
 377
 378                 snp+=ndiff;
 379
 380                 npath=1;
 381                 if(ndiff>1)
 382                         npath=(ndiff==2)?2:6;
 383
 384                 if (ndiff==1) {
 385                         transi=b[0][dmark[0]]+b[1][dmark[0]];
 386                         transi=(transi==1 || transi==5);
 387                         if (aa[0]==aa[1])  {
 388                                 if (transi)
 389                                         sts++;
 390                                 else
 391                                         stv++;
 392                         }
 393                         else {
 394                                 if (transi)
 395                                         nts++;
 396                                 else
 397                                         ntv++;
 398                         }
 399                 }
 400                 else {   /* ndiff=2 or 3 */
 401
 402                         nstop=0;
 403                         for(k=0; k<npath; k++) {
 404
 405                                 //set the step[]
 406                                 for(i1=0; i1<3; i1++)
 407                                         step[i1]=-1;
 408                                 if (ndiff==2) {
 409                                         step[0]=dmark[k];
 410                                         step[1]=dmark[1-k];
 411                                 }
 412                                 else {
 413                                         step[0]=k/2;
 414                                         step[1]=k%2;
 415                                         if (step[0]<=step[1])
 416                                                 step[1]++;
 417                                         step[2]=3-step[0]-step[1];
 418                                 }//end of set the step[]
 419
 420                                 for(i1=0; i1<3; i1++)
 421                                         bt1[i1]=bt2[i1]=b[0][i1];
 422
 423                                 stspath[k]=stvpath[k]=ntspath[k]=ntvpath[k]=0;
 424
 425                                 //ppath[]: probabilty of each path
 426                                 for (i1=0,ppath[k]=1; i1<ndiff; i1++) {
 427                                         bt2[step[i1]] = b[1][step[i1]];
 428
 429                                         //ct[]: mutated codon's ID(0--63)
 430                                         for (i2=0,ct[0]=ct[1]=0; i2<3; i2++) {
 431                                                 ct[0]+=bt1[i2]*by[i2];
 432                                                 ct[1]+=bt2[i2]*by[i2];
 433                                         }
 434                                         //ppath[k]: probabilty of path k
 435                                         ppath[k]*=PMatrix[ct[0]*CODON+ct[1]];
 436                                         for(i2=0; i2<2; i2++)
 437                                                 aa[i2]=getAminoAcid(ct[i2]);
 438
 439                                         if (aa[1]=='!') {
 440                                                 nstop++;
 441                                                 ppath[k]=0;
 442                                                 break;
 443                                         }
 444
 445                                         transi=b[0][step[i1]]+b[1][step[i1]];
 446                                         transi=(transi==1 || transi==5);  /* transition? */
 447
 448                                         //ts & tr when syn & nonsyn in path k
 449                                         if(aa[0]==aa[1]) {
 450                                                 if(transi)
 451                                                         stspath[k]++;
 452                                                 else
 453                                                         stvpath[k]++;
 454                                         }
 455                                         else {
 456                                                 if(transi)
 457                                                         ntspath[k]++;
 458                                                 else
 459                                                         ntvpath[k]++;
 460                                         }
 461
 462                                         for(i2=0; i2<3; i2++)
 463                                                 bt1[i2]=bt2[i2];
 464                                 }
 465
 466                         }  /* for(k,npath) */
 467                         if (npath==nstop) {  /* all paths through stop codons */
 468                                 if (ndiff==2) {
 469                                         nts=.5;
 470                                         ntv=1.5;
 471                                 }
 472                                 else {
 473                                         nts=.5;
 474                                         ntv=2.5;
 475                                 }
 476                         }
 477                         else {
 478                                 //sum probabilty of all path
 479                                 sump=sumArray(ppath,npath);
 480                                 if(sump>1e-20) {
 481                                         for(k=0;k<npath;k++) { //p: the probabilty of path k
 482                                                 p=ppath[k]/sump;
 483                                                 sts+=stspath[k]*p; stv+=stvpath[k]*p;
 484                                                 nts+=ntspath[k]*p; ntv+=ntvpath[k]*p;
 485                                         }
 486                                 }
 487                         }
 488
 489                 }//end of if(ndiff)
 490                 Sdts+=sts;
 491                 Sdtv+=stv;
 492                 Ndts+=nts;
 493                 Ndtv+=ntv;
 494
 495         }//end of for(h)
 496
 497    return (0);
 498 }
 499
 500 /* Calculate transition probability matrix(64*64) */
 501 int YN00::GetPMatCodon(double PMatrix[], double kappa, double omega)
 502 {
 503 /* Get PMat=exp(Q*t) for weighting pathways
 504         */
 505         int i,j,k, ndiff,pos=0,from[3],to[3];
 506         double mr;
 507         char c[2];
 508         double U[CODON*CODON], V[CODON*CODON], Root[CODON*CODON];
 509
 510         initArray(PMatrix, CODON*CODON);
 511         initArray(U, CODON*CODON);
 512         initArray(V, CODON*CODON);
 513         initArray(Root, CODON*CODON);
 514
 515         for(i=0; i<CODON; i++) {
 516                 for(j=0; j<i; j++) {
 517
 518                         //codon 'from'
 519                         from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
 520                         //codon 'to'
 521                         to[0]=j/16;   to[1]=(j/4)%4;   to[2]=j%4;
 522                         //amino acid of 'from' and 'to'
 523                         c[0]=getAminoAcid(i);
 524                         c[1]=getAminoAcid(j);
 525                         //stop codon
 526                         if (c[0]=='!' || c[1]=='!')
 527                                 continue;
 528
 529                         //whether two codons only have one difference
 530                         for (k=0,ndiff=0; k<3; k++) {
 531                                 if (from[k]!=to[k]) {
 532                                         ndiff++;
 533                                         pos=k;
 534                                 }
 535                         }
 536                         if (ndiff==1) {
 537                                 //only have one difference
 538                                 PMatrix[i*CODON+j]=1;
 539                                 //transition
 540                                 if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0)
 541                                         PMatrix[i*CODON+j]*=kappa;
 542
 543                                 //nonsynonymous
 544                                 if(c[0]!=c[1])
 545                                         PMatrix[i*CODON+j]*=omega;
 546
 547                                 //diagonal element is equal
 548                                 PMatrix[j*CODON+i]=PMatrix[i*CODON+j];
 549                         }
 550                 }
 551         }
 552
 553         //PMatrix[](*Q): transition probability matrix
 554         for(i=0; i<CODON; i++)
 555                 for(j=0; j<CODON; j++)
 556                         PMatrix[i*CODON+j]*=pi[j];
 557
 558         //scale the sum of PMat[][j](j=0-63) to zero
 559         for (i=0,mr=0; i<CODON; i++) {
 560                 PMatrix[i*CODON+i]=-sumArray(PMatrix+i*CODON,CODON);
 561                 //The sum of transition probability of main diagnoal elements
 562                 mr-=pi[i]*PMatrix[i*CODON+i];
 563         }
 564
 565         //calculate exp(PMatrix*t)
 566         eigenQREV(PMatrix, pi, pi_sqrt, CODON, npi0, Root, U, V);
 567         for(i=0; i<CODON; i++)
 568                 Root[i]/=mr;
 569         PMatUVRoot(PMatrix, t, CODON, U, V, Root);
 570
 571         return 0;
 572 }
 573
 574
 575
 576 /* P(t) = U * exp{Root*t} * V */
 577 int YN00::PMatUVRoot (double P[], double t, int n, double U[], double V[], double Root[]) {
 578
 579         int i,j,k;
 580         double expt, uexpt, *pP;
 581         double smallp = 0;
 582
 583
 584         for (k=0,initArray(P,n*n); k<n; k++)
 585                 for (i=0,pP=P,expt=exp(t*Root[k]); i<n; i++)
 586                         for (j=0,uexpt=U[i*n+k]*expt; j<n; j++)
 587                                 *pP++ += uexpt*V[k*n+j];
 588
 589         for(i=0;i<n*n;i++)
 590                 if(P[i]<smallp)
 591                         P[i]=0;
 592
 593         return (0);
 594 }
 595
 596
 597 /* Count the synonymous and nonsynonymous sites of two sequences */
 598 int YN00::CountSites(const string seq, double &Stot, double &Ntot,double fbS[],double fbN[]) {
 599         int h,i,j,k, c[2],aa[2], b[3], by[3]={16,4,1};
 600         double r, S,N;
 601
 602         Stot=Ntot=0;
 603         initArray(fbS, 4);
 604         initArray(fbN, 4);
 605
 606         for (h=0; h<seq.length(); h+=3) {
 607
 608                 //Get codon id and amino acid
 609                 c[0]=getID(seq.substr(h,3));
 610                 aa[0]=getAminoAcid(c[0]);
 611                 for(i=0; i<3; i++) {
 612                         b[i]=convertChar(seq[h+i]);
 613                 }
 614
 615                 for (j=0,S=N=0; j<3; j++) {
 616                         for(k=0; k<4; k++) {    /* b[j] changes to k */
 617                                 if (k==b[j])
 618                                         continue;
 619                                 //c[0] change at position j
 620                                 c[1] = c[0]+(k-b[j])*by[j];
 621                                 aa[1] = getAminoAcid(c[1]);
 622
 623                                 if(aa[1]=='!')
 624                                         continue;
 625
 626                                 r=pi[c[1]];
 627                                 if (k+b[j]==1 || k+b[j]==5)     //transition
 628                                         r*=kappa;
 629
 630                                 if (aa[0]==aa[1]) { //synonymous
 631                                         S+=r;
 632                                         fbS[b[j]]+=r; //syn probability of A,C,G,T
 633                                 }
 634                                 else { //nonsynonymous
 635                                         N+=r;
 636                                         fbN[b[j]]+=r; //nonsyn probability of A,C,G,T
 637                                 }
 638                         }
 639                 }
 640                 Stot+=S;
 641                 Ntot+=N;
 642         }
 643
 644         //Scale Stot+Ntot to seq.length()
 645         r=seq.length()/(Stot+Ntot);
 646         Stot*=r;
 647         Ntot*=r;
 648
 649         //get probablity of syn of four nul.
 650         r=sumArray(fbS,4);
 651         for(k=0; k<4; k++)
 652                 fbS[k]/=r;
 653
 654         //get probablity of nonsyn of four nul.
 655         r=sumArray(fbN,4);
 656         for(k=0; k<4; k++)
 657                 fbN[k]/=r;
 658
 659         return 0;
 660 }
 661
 662
 663 string YN00::Run(string seq1, string seq2) {
 664
 665         t=0.4;
 666         kappa = NA;
 667         omega = 1;
 668         Ks = Ka = 0.1;
 669
 670         getFreqency(seq1, seq2);
 671         GetKappa(seq1, seq2);
 672         DistanceYN00(seq1, seq2, Ks, Ka, SEKs, SEKa);
 673
 674         t = (S*Ks+N*Ka)/(S+N);
 675
 676         return parseOutput();
 677 }
 678
 679
 680
 681 //The following functions are used to calculate PMatrix by Taylor.
 682
 683 int YN00::eigenQREV (double Q[], double pi[], double pi_sqrt[], int n, int npi0,
 684                double Root[], double U[], double V[])
 685 {
 686 /*
 687 This finds the eigen solution of the rate matrix Q for a time-reversible
 688 Markov process, using the algorithm for a real symmetric matrix.
 689 Rate matrix Q = S * diag{pi} = U * diag{Root} * V,
 690 where S is symmetrical, all elements of pi are positive, and U*V = I.
 691 pi_sqrt[n-npi0] has to be calculated before calling this routine.
 692
 693   [U 0] [Q_0 0] [U^-1 0]    [Root  0]
 694   [0 I] [0   0] [0    I]  = [0     0]
 695
 696         Ziheng Yang, 25 December 2001 (ref is CME/eigenQ.pdf)
 697         */
 698         int i,j, inew,jnew, nnew=n-npi0, status;
 699
 700         //npi0 is the number of stop codons in selected genetic table
 701
 702         if(npi0==0) {   //seldom occur
 703
 704                 //Set U[64*64]
 705                 for(i=0; i<n; i++) {
 706                         for(j=0,U[i*n+i] = Q[i*n+i]; j<i; j++)
 707                                 U[i*n+j] = U[j*n+i] = (Q[i*n+j] * pi_sqrt[i]/pi_sqrt[j]);
 708                 }
 709
 710                 //Set U[64*64]
 711                 status=eigenRealSym(U, n, Root, V);
 712
 713                 for(i=0;i<n;i++)
 714                         for(j=0;j<n;j++)
 715                                 V[i*n+j] = U[j*n+i] * pi_sqrt[j];
 716
 717                 for(i=0;i<n;i++)
 718                         for(j=0;j<n;j++)
 719                                 U[i*n+j] /= pi_sqrt[i];
 720         }
 721         else {
 722                 for(i=0,inew=0; i<n; i++) {
 723                         if(pi[i]) {
 724                                 for(j=0,jnew=0; j<i; j++)
 725                                         if(pi[j]) {
 726                                                 U[inew*nnew+jnew] = U[jnew*nnew+inew] = Q[i*n+j] * pi_sqrt[inew]/pi_sqrt[jnew];
 727                                                 jnew++;
 728                                         }
 729                                 U[inew*nnew+inew] = Q[i*n+i];
 730                                 inew++;
 731                         }
 732                 }
 733                 status=eigenRealSym(U, nnew, Root, V);
 734
 735                 for(i=n-1,inew=nnew-1; i>=0; i--)   /* construct Root */
 736                         Root[i] = (pi[i] ? Root[inew--] : 0);
 737
 738                 for(i=n-1,inew=nnew-1; i>=0; i--) {  /* construct V */
 739                         if(pi[i]) {
 740                                 for(j=n-1,jnew=nnew-1; j>=0; j--)
 741                                         if(pi[j]) {
 742                                                 V[i*n+j] = U[jnew*nnew+inew]*pi_sqrt[jnew];
 743                                                 jnew--;
 744                                         }
 745                                         else
 746                                                 V[i*n+j] = (i==j);
 747                                 inew--;
 748                         }
 749                         else
 750                                 for(j=0; j<n; j++)
 751                                         V[i*n+j] = (i==j);
 752                 }
 753
 754                 for(i=n-1,inew=nnew-1; i>=0; i--) {  /* construct U */
 755                         if(pi[i]) {
 756                                 for(j=n-1,jnew=nnew-1;j>=0;j--)
 757                                         if(pi[j]) {
 758                                                 U[i*n+j] = U[inew*nnew+jnew]/pi_sqrt[inew];
 759                                                 jnew--;
 760                                         }
 761                                         else
 762                                                 U[i*n+j] = (i==j);
 763                                         inew--;
 764                         }
 765                         else
 766                                 for(j=0;j<n;j++)
 767                                         U[i*n+j] = (i==j);
 768                 }
 769         }
 770
 771         return(status);
 772 }
 773
 774 void YN00::HouseholderRealSym(double a[], int n, double d[], double e[]) {
 775 /* This uses HouseholderRealSym transformation to reduce a real symmetrical matrix
 776 a[n*n] into a tridiagonal matrix represented by d and e.
 777 d[] is the diagonal (eigends), and e[] the off-diagonal.
 778         */
 779         int m,k,j,i;
 780         double scale,hh,h,g,f;
 781
 782         for (i=n-1;i>=1;i--) {
 783                 m=i-1;
 784                 h=scale=0;
 785                 if (m > 0) {
 786                         for (k=0;k<=m;k++)
 787                                 scale += fabs(a[i*n+k]);
 788                         if (scale == 0)
 789                                 e[i]=a[i*n+m];
 790                         else {
 791                                 for (k=0;k<=m;k++) {
 792                                         a[i*n+k] /= scale;
 793                                         h += a[i*n+k]*a[i*n+k];
 794                                 }
 795                                 f=a[i*n+m];
 796                                 g=(f >= 0 ? -sqrt(h) : sqrt(h));
 797                                 e[i]=scale*g;
 798                                 h -= f*g;
 799                                 a[i*n+m]=f-g;
 800                                 f=0;
 801                                 for (j=0;j<=m;j++) {
 802                                         a[j*n+i]=a[i*n+j]/h;
 803                                         g=0;
 804                                         for (k=0;k<=j;k++)
 805                                                 g += a[j*n+k]*a[i*n+k];
 806                                         for (k=j+1;k<=m;k++)
 807                                                 g += a[k*n+j]*a[i*n+k];
 808                                         e[j]=g/h;
 809                                         f += e[j]*a[i*n+j];
 810                                 }
 811                                 hh=f/(h*2);
 812                                 for (j=0;j<=m;j++) {
 813                                         f=a[i*n+j];
 814                                         e[j]=g=e[j]-hh*f;
 815                                         for (k=0;k<=j;k++)
 816                                                 a[j*n+k] -= (f*e[k]+g*a[i*n+k]);
 817                                 }
 818                         }
 819                 }
 820                 else
 821                         e[i]=a[i*n+m];
 822                 d[i]=h;
 823         }
 824         d[0]=e[0]=0;
 825
 826         /* Get eigenvectors */
 827         for (i=0;i<n;i++) {
 828                 m=i-1;
 829                 if (d[i]) {
 830                         for (j=0;j<=m;j++) {
 831                                 g=0;
 832                                 for (k=0;k<=m;k++)
 833                                         g += a[i*n+k]*a[k*n+j];
 834                                 for (k=0;k<=m;k++)
 835                                         a[k*n+j] -= g*a[k*n+i];
 836                         }
 837                 }
 838                 d[i]=a[i*n+i];
 839                 a[i*n+i]=1;
 840                 for (j=0;j<=m;j++) a[j*n+i]=a[i*n+j]=0;
 841         }
 842 }
 843
 844
 845
 846 int YN00::EigenTridagQLImplicit(double d[], double e[], int n, double z[])
 847 {
 848 /* This finds the eigen solution of a tridiagonal matrix represented by d and e.
 849 d[] is the diagonal (eigenvalues), e[] is the off-diagonal
 850 z[n*n]: as input should have the identity matrix to get the eigen solution of the
 851 tridiagonal matrix, or the output from HouseholderRealSym() to get the
 852 eigen solution to the original real symmetric matrix.
 853 z[n*n]: has the orthogonal matrix as output
 854
 855   Adapted from routine tqli in Numerical Recipes in C, with reference to
 856   LAPACK fortran code.
 857   Ziheng Yang, May 2001
 858         */
 859         int m,j,iter,niter=30, status=0, i,k;
 860         double s,r,p,g,f,dd,c,b, aa,bb;
 861
 862         for (i=1;i<n;i++) e[i-1]=e[i];  e[n-1]=0;
 863         for (j=0;j<n;j++) {
 864                 iter=0;
 865                 do {
 866                         for (m=j;m<n-1;m++) {
 867                                 dd=fabs(d[m])+fabs(d[m+1]);
 868                                 if (fabs(e[m])+dd == dd) break;  /* ??? */
 869                         }
 870                         if (m != j) {
 871                                 if (iter++ == niter) {
 872                                         status=-1;
 873                                         break;
 874                                 }
 875                                 g=(d[j+1]-d[j])/(2*e[j]);
 876
 877                                 /* r=pythag(g,1); */
 878
 879                                 if((aa=fabs(g))>1)  r=aa*sqrt(1+1/(g*g));
 880                                 else                r=sqrt(1+g*g);
 881
 882                                 g=d[m]-d[j]+e[j]/(g+SIGN(r,g));
 883                                 s=c=1;
 884                                 p=0;
 885                                 for (i=m-1;i>=j;i--) {
 886                                         f=s*e[i];
 887                                         b=c*e[i];
 888
 889                                         /*  r=pythag(f,g);  */
 890                                         aa=fabs(f); bb=fabs(g);
 891                                         if(aa>bb)       { bb/=aa;  r=aa*sqrt(1+bb*bb); }
 892                                         else if(bb==0)             r=0;
 893                                         else            { aa/=bb;  r=bb*sqrt(1+aa*aa); }
 894
 895                                         e[i+1]=r;
 896                                         if (r == 0) {
 897                                                 d[i+1] -= p;
 898                                                 e[m]=0;
 899                                                 break;
 900                                         }
 901                                         s=f/r;
 902                                         c=g/r;
 903                                         g=d[i+1]-p;
 904                                         r=(d[i]-g)*s+2*c*b;
 905                                         d[i+1]=g+(p=s*r);
 906                                         g=c*r-b;
 907                                         for (k=0;k<n;k++) {
 908                                                 f=z[k*n+i+1];
 909                                                 z[k*n+i+1]=s*z[k*n+i]+c*f;
 910                                                 z[k*n+i]=c*z[k*n+i]-s*f;
 911                                         }
 912                                 }
 913                                 if (r == 0 && i >= j) continue;
 914                                 d[j]-=p; e[j]=g; e[m]=0;
 915                         }
 916                 } while (m != j);
 917         }
 918         return(status);
 919 }
 920
 921 void YN00::EigenSort(double d[], double U[], int n)
 922 {
 923 /* this sorts the eigen values d[] and rearrange the (right) eigen vectors U[]
 924         */
 925         int k,j,i;
 926         double p;
 927
 928         for (i=0;i<n-1;i++) {
 929                 p=d[k=i];
 930                 for (j=i+1;j<n;j++)
 931                         if (d[j] >= p) p=d[k=j];
 932                         if (k != i) {
 933                                 d[k]=d[i];
 934                                 d[i]=p;
 935                                 for (j=0;j<n;j++) {
 936                                         p=U[j*n+i];
 937                                         U[j*n+i]=U[j*n+k];
 938                                         U[j*n+k]=p;
 939                                 }
 940                         }
 941         }
 942 }
 943
 944 int YN00::eigenRealSym(double A[], int n, double Root[], double work[])
 945 {
 946 /* This finds the eigen solution of a real symmetrical matrix A[n*n].  In return,
 947 A has the right vectors and Root has the eigenvalues. work[n] is the working space.
 948 The matrix is first reduced to a tridiagonal matrix using HouseholderRealSym(),
 949 and then using the QL algorithm with implicit shifts.
 950
 951   Adapted from routine tqli in Numerical Recipes in C, with reference to LAPACK
 952   Ziheng Yang, 23 May 2001
 953         */
 954         int status=0;
 955         HouseholderRealSym(A, n, Root, work);
 956         status=EigenTridagQLImplicit(Root, work, n, A);
 957         EigenSort(Root, A, n);
 958
 959         return(status);
 960 }
 961