src/tools/gmx_cluster.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Green Red Orange Magenta Azure Cyan Skyblue
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38 #include <math.h>
  39 #include <string.h>
  40 #include <ctype.h>
  41 #include "macros.h"
  42 #include "smalloc.h"
  43 #include "typedefs.h"
  44 #include "copyrite.h"
  45 #include "statutil.h"
  46 #include "tpxio.h"
  47 #include "string2.h"
  48 #include "vec.h"
  49 #include "macros.h"
  50 #include "index.h"
  51 #include "random.h"
  52 #include "pbc.h"
  53 #include "xvgr.h"
  54 #include "futil.h"
  55 #include "matio.h"
  56 #include "eigensolver.h"
  57 #include "cmat.h"
  58 #include "do_fit.h"
  59 #include "trnio.h"
  60 #include "viewit.h"
  61 #include "gmx_ana.h"
  62
  63 /* macro's to print to two file pointers at once (i.e. stderr and log) */
  64 #define lo_ffprintf(fp1,fp2,buf) \
  65    fprintf(fp1,"%s",buf);\
  66    fprintf(fp2,"%s",buf);
  67 /* just print a prepared buffer to fp1 and fp2 */
  68 #define ffprintf(fp1,fp2,buf) { lo_ffprintf(fp1,fp2,buf) }
  69 /* prepare buffer with one argument, then print to fp1 and fp2 */
  70 #define ffprintf1(fp1,fp2,buf,fmt,arg) {\
  71    sprintf(buf,fmt,arg);\
  72    lo_ffprintf(fp1,fp2,buf)\
  73 }
  74 /* prepare buffer with two arguments, then print to fp1 and fp2 */
  75 #define ffprintf2(fp1,fp2,buf,fmt,arg1,arg2) {\
  76    sprintf(buf,fmt,arg1,arg2);\
  77    lo_ffprintf(fp1,fp2,buf)\
  78 }
  79
  80 typedef struct {
  81   int ncl;
  82   int *cl;
  83 } t_clusters;
  84
  85 typedef struct {
  86   int nr;
  87   int *nb;
  88 } t_nnb;
  89
  90 void pr_energy(FILE *fp,real e)
  91 {
  92   fprintf(fp,"Energy: %8.4f\n",e);
  93 }
  94
  95 void cp_index(int nn,int from[],int to[])
  96 {
  97   int i;
  98
  99   for(i=0; (i<nn); i++)
 100     to[i]=from[i];
 101 }
 102
 103 void mc_optimize(FILE *log,t_mat *m,int maxiter,int *seed,real kT)
 104 {
 105   real e[2],ei,ej,efac;
 106   int  *low_index;
 107   int  cur=0;
 108 #define next (1-cur)
 109   int  i,isw,jsw,iisw,jjsw,nn;
 110
 111   fprintf(stderr,"\nDoing Monte Carlo clustering\n");
 112   nn = m->nn;
 113   snew(low_index,nn);
 114   cp_index(nn,m->m_ind,low_index);
 115   if (getenv("TESTMC")) {
 116     e[cur] = mat_energy(m);
 117     pr_energy(log,e[cur]);
 118     fprintf(log,"Doing 1000 random swaps\n");
 119     for(i=0; (i<1000); i++) {
 120       do {
 121         isw = nn*rando(seed);
 122         jsw = nn*rando(seed);
 123       } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
 124       iisw = m->m_ind[isw];
 125       jjsw = m->m_ind[jsw];
 126       m->m_ind[isw] = jjsw;
 127       m->m_ind[jsw] = iisw;
 128     }
 129   }
 130   e[cur] = mat_energy(m);
 131   pr_energy(log,e[cur]);
 132   for(i=0; (i<maxiter); i++) {
 133     do {
 134       isw = nn*rando(seed);
 135       jsw = nn*rando(seed);
 136     } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
 137
 138     iisw = m->m_ind[isw];
 139     jjsw = m->m_ind[jsw];
 140     ei   = row_energy(nn,iisw,m->mat[jsw]);
 141     ej   = row_energy(nn,jjsw,m->mat[isw]);
 142
 143     e[next] = e[cur] + (ei+ej-EROW(m,isw)-EROW(m,jsw))/nn;
 144
 145     efac = kT ? exp((e[next]-e[cur])/kT) : -1;
 146     if ((e[next] > e[cur]) || (efac > rando(seed))) {
 147
 148       if (e[next] > e[cur])
 149         cp_index(nn,m->m_ind,low_index);
 150       else
 151         fprintf(log,"Taking uphill step\n");
 152
 153       /* Now swapping rows */
 154       m->m_ind[isw] = jjsw;
 155       m->m_ind[jsw] = iisw;
 156       EROW(m,isw)   = ei;
 157       EROW(m,jsw)   = ej;
 158       cur           = next;
 159       fprintf(log,"Iter: %d Swapped %4d and %4d (now %g)",
 160               i,isw,jsw,mat_energy(m));
 161       pr_energy(log,e[cur]);
 162     }
 163   }
 164   /* Now restore the highest energy index */
 165   cp_index(nn,low_index,m->m_ind);
 166 }
 167
 168 static void calc_dist(int nind,rvec x[],real **d)
 169 {
 170   int     i,j;
 171   real    *xi;
 172   rvec    dx;
 173
 174   for(i=0; (i<nind-1); i++) {
 175     xi=x[i];
 176     for(j=i+1; (j<nind); j++) {
 177       /* Should use pbc_dx when analysing multiple molecueles,
 178        * but the box is not stored for every frame.
 179        */
 180       rvec_sub(xi,x[j],dx);
 181       d[i][j]=norm(dx);
 182     }
 183   }
 184 }
 185
 186 static real rms_dist(int isize,real **d,real **d_r)
 187 {
 188   int  i,j;
 189   real r,r2;
 190
 191   r2=0.0;
 192   for(i=0; (i<isize-1); i++)
 193     for(j=i+1; (j<isize); j++) {
 194       r=d[i][j]-d_r[i][j];
 195       r2+=r*r;
 196     }
 197   r2/=(isize*(isize-1))/2;
 198
 199   return sqrt(r2);
 200 }
 201
 202 static int rms_dist_comp(const void *a,const void *b)
 203 {
 204   t_dist *da,*db;
 205
 206   da = (t_dist *)a;
 207   db = (t_dist *)b;
 208
 209   if (da->dist - db->dist < 0)
 210     return -1;
 211   else if (da->dist - db->dist > 0)
 212     return 1;
 213   return 0;
 214 }
 215
 216 static int clust_id_comp(const void *a,const void *b)
 217 {
 218   t_clustid *da,*db;
 219
 220   da = (t_clustid *)a;
 221   db = (t_clustid *)b;
 222
 223   return da->clust - db->clust;
 224 }
 225
 226 static int nrnb_comp(const void *a, const void *b)
 227 {
 228   t_nnb *da, *db;
 229
 230   da = (t_nnb *)a;
 231   db = (t_nnb *)b;
 232
 233   /* return the b-a, we want highest first */
 234   return db->nr - da->nr;
 235 }
 236
 237 void gather(t_mat *m,real cutoff,t_clusters *clust)
 238 {
 239   t_clustid *c;
 240   t_dist    *d;
 241   int       i,j,k,nn,cid,n1,diff;
 242   bool      bChange;
 243
 244   /* First we sort the entries in the RMSD matrix */
 245   n1 = m->nn;
 246   nn = ((n1-1)*n1)/2;
 247   snew(d,nn);
 248   for(i=k=0; (i<n1); i++)
 249     for(j=i+1; (j<n1); j++,k++) {
 250       d[k].i    = i;
 251       d[k].j    = j;
 252       d[k].dist = m->mat[i][j];
 253     }
 254   if (k != nn)
 255     gmx_incons("gather algortihm");
 256   qsort(d,nn,sizeof(d[0]),rms_dist_comp);
 257
 258   /* Now we make a cluster index for all of the conformations */
 259   c = new_clustid(n1);
 260
 261   /* Now we check the closest structures, and equalize their cluster numbers */
 262   fprintf(stderr,"Linking structures ");
 263   do {
 264     fprintf(stderr,"*");
 265     bChange=FALSE;
 266     for(k=0; (k<nn) && (d[k].dist < cutoff); k++) {
 267       diff = c[d[k].j].clust - c[d[k].i].clust;
 268       if (diff) {
 269         bChange = TRUE;
 270         if (diff > 0)
 271           c[d[k].j].clust = c[d[k].i].clust;
 272         else
 273           c[d[k].i].clust = c[d[k].j].clust;
 274       }
 275     }
 276   } while (bChange);
 277   fprintf(stderr,"\nSorting and renumbering clusters\n");
 278   /* Sort on cluster number */
 279   qsort(c,n1,sizeof(c[0]),clust_id_comp);
 280
 281   /* Renumber clusters */
 282   cid = 1;
 283   for(k=1; k<n1; k++) {
 284     if (c[k].clust != c[k-1].clust) {
 285       c[k-1].clust = cid;
 286       cid ++;
 287     } else
 288       c[k-1].clust = cid;
 289   }
 290   c[k-1].clust = cid;
 291   if (debug)
 292     for(k=0; (k<n1); k++)
 293       fprintf(debug,"Cluster index for conformation %d: %d\n",
 294               c[k].conf,c[k].clust);
 295   clust->ncl = cid;
 296   for(k=0; k<n1; k++)
 297     clust->cl[c[k].conf] = c[k].clust;
 298
 299   sfree(c);
 300   sfree(d);
 301 }
 302
 303 bool jp_same(int **nnb,int i,int j,int P)
 304 {
 305   bool bIn;
 306   int  k,ii,jj,pp;
 307
 308   bIn = FALSE;
 309   for(k=0; nnb[i][k]>=0; k++)
 310     bIn = bIn || (nnb[i][k] == j);
 311   if (!bIn)
 312     return FALSE;
 313
 314   bIn = FALSE;
 315   for(k=0; nnb[j][k]>=0; k++)
 316     bIn = bIn || (nnb[j][k] == i);
 317   if (!bIn)
 318     return FALSE;
 319
 320   pp=0;
 321   for(ii=0; nnb[i][ii]>=0; ii++)
 322     for(jj=0; nnb[j][jj]>=0; jj++)
 323       if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
 324         pp++;
 325
 326   return (pp >= P);
 327 }
 328
 329 static void jarvis_patrick(int n1,real **mat,int M,int P,
 330                            real rmsdcut,t_clusters *clust)
 331 {
 332   t_dist    *row;
 333   t_clustid *c;
 334   int       **nnb;
 335   int       i,j,k,cid,diff,max;
 336   bool      bChange;
 337   real      **mcpy=NULL;
 338
 339   if (rmsdcut < 0)
 340     rmsdcut = 10000;
 341
 342   /* First we sort the entries in the RMSD matrix row by row.
 343    * This gives us the nearest neighbor list.
 344    */
 345   snew(nnb,n1);
 346   snew(row,n1);
 347   for(i=0; (i<n1); i++) {
 348     for(j=0; (j<n1); j++) {
 349       row[j].j    = j;
 350       row[j].dist = mat[i][j];
 351     }
 352     qsort(row,n1,sizeof(row[0]),rms_dist_comp);
 353     if (M>0) {
 354       /* Put the M nearest neighbors in the list */
 355       snew(nnb[i],M+1);
 356       for(j=k=0; (k<M) && (j<n1) && (mat[i][row[j].j] < rmsdcut); j++)
 357         if (row[j].j  != i) {
 358           nnb[i][k]  = row[j].j;
 359           k++;
 360         }
 361       nnb[i][k] = -1;
 362     } else {
 363       /* Put all neighbors nearer than rmsdcut in the list */
 364       max=0;
 365       k=0;
 366       for(j=0; (j<n1) && (mat[i][row[j].j] < rmsdcut); j++)
 367         if (row[j].j != i) {
 368           if (k >= max) {
 369             max += 10;
 370             srenew(nnb[i],max);
 371           }
 372           nnb[i][k] = row[j].j;
 373           k++;
 374         }
 375       if (k == max)
 376         srenew(nnb[i],max+1);
 377       nnb[i][k] = -1;
 378     }
 379   }
 380   sfree(row);
 381   if (debug) {
 382     fprintf(debug,"Nearest neighborlist. M = %d, P = %d\n",M,P);
 383     for(i=0; (i<n1); i++) {
 384       fprintf(debug,"i:%5d nbs:",i);
 385       for(j=0; nnb[i][j]>=0; j++)
 386         fprintf(debug,"%5d[%5.3f]",nnb[i][j],mat[i][nnb[i][j]]);
 387       fprintf(debug,"\n");
 388     }
 389   }
 390
 391   c = new_clustid(n1);
 392   fprintf(stderr,"Linking structures ");
 393   /* Use mcpy for temporary storage of booleans */
 394   mcpy = mk_matrix(n1,n1,FALSE);
 395   for(i=0; i<n1; i++)
 396     for(j=i+1; j<n1; j++)
 397       mcpy[i][j] = jp_same(nnb,i,j,P);
 398   do {
 399     fprintf(stderr,"*");
 400     bChange=FALSE;
 401     for(i=0; i<n1; i++) {
 402       for(j=i+1; j<n1; j++)
 403         if (mcpy[i][j]) {
 404           diff = c[j].clust - c[i].clust;
 405           if (diff) {
 406             bChange = TRUE;
 407             if (diff > 0)
 408               c[j].clust = c[i].clust;
 409             else
 410               c[i].clust = c[j].clust;
 411           }
 412         }
 413     }
 414   } while (bChange);
 415
 416   fprintf(stderr,"\nSorting and renumbering clusters\n");
 417   /* Sort on cluster number */
 418   qsort(c,n1,sizeof(c[0]),clust_id_comp);
 419
 420   /* Renumber clusters */
 421   cid = 1;
 422   for(k=1; k<n1; k++) {
 423     if (c[k].clust != c[k-1].clust) {
 424       c[k-1].clust = cid;
 425       cid ++;
 426     } else
 427       c[k-1].clust = cid;
 428   }
 429   c[k-1].clust = cid;
 430   clust->ncl = cid;
 431   for(k=0; k<n1; k++)
 432     clust->cl[c[k].conf] = c[k].clust;
 433   if (debug)
 434     for(k=0; (k<n1); k++)
 435       fprintf(debug,"Cluster index for conformation %d: %d\n",
 436               c[k].conf,c[k].clust);
 437
 438 /* Again, I don't see the point in this... (AF) */
 439 /*   for(i=0; (i<n1); i++) { */
 440 /*     for(j=0; (j<n1); j++) */
 441 /*       mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
 442 /*   } */
 443 /*   for(i=0; (i<n1); i++) { */
 444 /*     for(j=0; (j<n1); j++) */
 445 /*       mat[i][j] = mcpy[i][j]; */
 446 /*   } */
 447   done_matrix(n1,&mcpy);
 448
 449   sfree(c);
 450   for(i=0; (i<n1); i++)
 451     sfree(nnb[i]);
 452   sfree(nnb);
 453 }
 454
 455 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
 456 {
 457   int i,j;
 458
 459   /* dump neighbor list */
 460   fprintf(fp,"%s",title);
 461   for(i=0; (i<n1); i++) {
 462     fprintf(fp,"i:%5d #:%5d nbs:",i,nnb[i].nr);
 463     for(j=0; j<nnb[i].nr; j++)
 464       fprintf(fp,"%5d",nnb[i].nb[j]);
 465     fprintf(fp,"\n");
 466   }
 467 }
 468
 469 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
 470 {
 471   t_dist *row;
 472   t_nnb  *nnb;
 473   int    i,j,k,j1,max;
 474
 475   /* Put all neighbors nearer than rmsdcut in the list */
 476   fprintf(stderr,"Making list of neighbors within cutoff ");
 477   snew(nnb,n1);
 478   snew(row,n1);
 479   for(i=0; (i<n1); i++) {
 480     max=0;
 481     k=0;
 482     /* put all neighbors within cut-off in list */
 483     for(j=0; j<n1; j++)
 484       if (mat[i][j] < rmsdcut) {
 485         if (k >= max) {
 486           max += 10;
 487           srenew(nnb[i].nb,max);
 488         }
 489         nnb[i].nb[k] = j;
 490         k++;
 491       }
 492     /* store nr of neighbors, we'll need that */
 493     nnb[i].nr = k;
 494     if (i%(1+n1/100)==0) fprintf(stderr,"%3d%%\b\b\b\b",(i*100+1)/n1);
 495   }
 496   fprintf(stderr,"%3d%%\n",100);
 497   sfree(row);
 498
 499   /* sort neighbor list on number of neighbors, largest first */
 500   qsort(nnb,n1,sizeof(nnb[0]),nrnb_comp);
 501
 502   if (debug) dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
 503
 504   /* turn first structure with all its neighbors (largest) into cluster
 505      remove them from pool of structures and repeat for all remaining */
 506   fprintf(stderr,"Finding clusters %4d", 0);
 507   /* cluster id's start at 1: */
 508   k=1;
 509   while(nnb[0].nr) {
 510     /* set cluster id (k) for first item in neighborlist */
 511     for (j=0; j<nnb[0].nr; j++)
 512       clust->cl[nnb[0].nb[j]] = k;
 513     /* mark as done */
 514     nnb[0].nr=0;
 515     sfree(nnb[0].nb);
 516
 517     /* adjust number of neighbors for others, taking removals into account: */
 518     for(i=1; i<n1 && nnb[i].nr; i++) {
 519       j1=0;
 520       for(j=0; j<nnb[i].nr; j++)
 521         /* if this neighbor wasn't removed */
 522         if ( clust->cl[nnb[i].nb[j]] == 0 ) {
 523           /* shift the rest (j1<=j) */
 524           nnb[i].nb[j1]=nnb[i].nb[j];
 525           /* next */
 526           j1++;
 527         }
 528       /* now j1 is the new number of neighbors */
 529       nnb[i].nr=j1;
 530     }
 531     /* sort again on nnb[].nr, because we have new # neighbors: */
 532     /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
 533     qsort(nnb,i,sizeof(nnb[0]),nrnb_comp);
 534
 535     fprintf(stderr,"\b\b\b\b%4d",k);
 536     /* new cluster id */
 537     k++;
 538   }
 539   fprintf(stderr,"\n");
 540   sfree(nnb);
 541   if (debug) {
 542     fprintf(debug,"Clusters (%d):\n", k);
 543     for(i=0; i<n1; i++)
 544       fprintf(debug," %3d", clust->cl[i]);
 545     fprintf(debug,"\n");
 546   }
 547
 548   clust->ncl=k-1;
 549 }
 550
 551 rvec **read_whole_trj(const char *fn,int isize,atom_id index[],int skip,
 552                       int *nframe, real **time,const output_env_t oenv)
 553 {
 554   rvec   **xx,*x;
 555   matrix box;
 556   real   t;
 557   int    i,i0,j,max_nf;
 558   int    status,natom;
 559
 560   max_nf = 0;
 561   xx     = NULL;
 562   *time  = NULL;
 563   natom = read_first_x(oenv,&status,fn,&t,&x,box);
 564   i  = 0;
 565   i0 = 0;
 566   do {
 567     if (i0 >= max_nf) {
 568       max_nf += 10;
 569       srenew(xx,max_nf);
 570       srenew(*time,max_nf);
 571     }
 572     if ((i % skip) == 0) {
 573       snew(xx[i0],isize);
 574       /* Store only the interesting atoms */
 575       for(j=0; (j<isize); j++)
 576         copy_rvec(x[index[j]],xx[i0][j]);
 577       (*time)[i0] = t;
 578       i0 ++;
 579     }
 580     i++;
 581   } while (read_next_x(oenv,status,&t,natom,x,box));
 582   fprintf(stderr,"Allocated %lu bytes for frames\n",
 583           (unsigned long) (max_nf*isize*sizeof(**xx)));
 584   fprintf(stderr,"Read %d frames from trajectory %s\n",i0,fn);
 585   *nframe = i0;
 586   sfree(x);
 587
 588   return xx;
 589 }
 590
 591 static int plot_clusters(int nf, real **mat, t_clusters *clust,
 592                          int nlevels, int minstruct)
 593 {
 594   int i,j,ncluster,ci;
 595   int *cl_id,*nstruct,*strind;
 596
 597   snew(cl_id,nf);
 598   snew(nstruct,nf);
 599   snew(strind,nf);
 600   for(i=0; i<nf; i++) {
 601     strind[i] = 0;
 602     cl_id[i]  = clust->cl[i];
 603     nstruct[cl_id[i]]++;
 604   }
 605   ncluster = 0;
 606   for(i=0; i<nf; i++) {
 607     if (nstruct[i] >= minstruct) {
 608       ncluster++;
 609       for(j=0; (j<nf); j++)
 610         if (cl_id[j] == i)
 611           strind[j] = ncluster;
 612     }
 613   }
 614   ncluster++;
 615   fprintf(stderr,"There are %d clusters with at least %d conformations\n",
 616             ncluster,minstruct);
 617
 618   for(i=0; (i<nf); i++) {
 619     ci = cl_id[i];
 620     for(j=0; j<i; j++)
 621       if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct)) {
 622         /* color different clusters with different colors, as long as
 623            we don't run out of colors */
 624         mat[i][j] = strind[i];
 625       }
 626       else
 627         mat[i][j] = 0;
 628   }
 629   sfree(strind);
 630   sfree(nstruct);
 631   sfree(cl_id);
 632
 633   return ncluster;
 634 }
 635
 636 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
 637 {
 638   int i,j,v;
 639
 640   for(i=0; i<nf; i++)
 641     for(j=0; j<i; j++)
 642       if (clust->cl[i] == clust->cl[j])
 643         mat[i][j] = val;
 644       else
 645         mat[i][j] = 0;
 646 }
 647
 648 static char *parse_filename(const char *fn, int maxnr)
 649 {
 650   int i;
 651   char *fnout;
 652   const char *ext;
 653   char buf[STRLEN];
 654
 655   if (strchr(fn,'%'))
 656     gmx_fatal(FARGS,"will not number filename %s containing '%c'",fn,'%');
 657   /* number of digits needed in numbering */
 658   i = (int)(log(maxnr)/log(10)) + 1;
 659   /* split fn and ext */
 660   ext = strrchr(fn, '.');
 661   if (!ext)
 662     gmx_fatal(FARGS,"cannot separate extension in filename %s",fn);
 663   ext++;
 664   /* insert e.g. '%03d' between fn and ext */
 665   sprintf(buf,"%s%%0%dd.%s",fn,i,ext);
 666   snew(fnout,strlen(buf)+1);
 667   strcpy(fnout, buf);
 668
 669   return fnout;
 670 }
 671
 672 static void ana_trans(t_clusters *clust, int nf,
 673                       const char *transfn, const char *ntransfn, FILE *log,
 674                       t_rgb rlo,t_rgb rhi,const output_env_t oenv)
 675 {
 676   FILE *fp;
 677   real **trans,*axis;
 678   int  *ntrans;
 679   int  i,ntranst,maxtrans;
 680   char buf[STRLEN];
 681
 682   snew(ntrans,clust->ncl);
 683   snew(trans,clust->ncl);
 684   snew(axis,clust->ncl);
 685   for(i=0; i<clust->ncl; i++) {
 686     axis[i]=i+1;
 687     snew(trans[i],clust->ncl);
 688   }
 689   ntranst=0;
 690   maxtrans=0;
 691   for(i=1; i<nf; i++)
 692     if(clust->cl[i] != clust->cl[i-1]) {
 693       ntranst++;
 694       ntrans[clust->cl[i-1]-1]++;
 695       ntrans[clust->cl[i]-1]++;
 696       trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
 697       maxtrans = max(maxtrans, trans[clust->cl[i]-1][clust->cl[i-1]-1]);
 698     }
 699   ffprintf2(stderr,log,buf,"Counted %d transitions in total, "
 700             "max %d between two specific clusters\n",ntranst,maxtrans);
 701   if (transfn) {
 702     fp=ffopen(transfn,"w");
 703     i = min(maxtrans+1, 80);
 704     write_xpm(fp,0,"Cluster Transitions","# transitions",
 705               "from cluster","to cluster",
 706               clust->ncl, clust->ncl, axis, axis, trans,
 707               0, maxtrans, rlo, rhi, &i);
 708     ffclose(fp);
 709   }
 710   if (ntransfn) {
 711     fp=xvgropen(ntransfn,"Cluster Transitions","Cluster #","# transitions",
 712                 oenv);
 713     for(i=0; i<clust->ncl; i++)
 714       fprintf(fp,"%5d %5d\n",i+1,ntrans[i]);
 715     ffclose(fp);
 716   }
 717   sfree(ntrans);
 718   for(i=0; i<clust->ncl; i++)
 719     sfree(trans[i]);
 720   sfree(trans);
 721   sfree(axis);
 722 }
 723
 724 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
 725                              int natom, t_atoms *atoms, rvec *xtps,
 726                              real *mass, rvec **xx, real *time,
 727                              int ifsize, atom_id *fitidx,
 728                              int iosize, atom_id *outidx,
 729                              const char *trxfn, const char *sizefn,
 730                              const char *transfn, const char *ntransfn,
 731                              const char *clustidfn, bool bAverage,
 732                              int write_ncl, int write_nst, real rmsmin,
 733                              bool bFit, FILE *log,t_rgb rlo,t_rgb rhi,
 734                              const output_env_t oenv)
 735 {
 736   FILE *fp=NULL;
 737   char buf[STRLEN],buf1[40],buf2[40],buf3[40],*trxsfn;
 738   int  trxout=0,trxsout=0;
 739   int  i,i1,cl,nstr,*structure,first=0,midstr;
 740   bool *bWrite=NULL;
 741   real r,clrmsd,midrmsd;
 742   rvec *xav=NULL;
 743   matrix zerobox;
 744
 745   clear_mat(zerobox);
 746
 747   ffprintf1(stderr,log,buf,"\nFound %d clusters\n\n",clust->ncl);
 748   trxsfn=NULL;
 749   if (trxfn) {
 750     /* do we write all structures? */
 751     if (write_ncl) {
 752       trxsfn = parse_filename(trxfn, max(write_ncl,clust->ncl));
 753       snew(bWrite,nf);
 754     }
 755     ffprintf2(stderr,log,buf,"Writing %s structure for each cluster to %s\n",
 756               bAverage ? "average" : "middle", trxfn);
 757     if (write_ncl) {
 758       /* find out what we want to tell the user:
 759          Writing [all structures|structures with rmsd > %g] for
 760          {all|first %d} clusters {with more than %d structures} to %s     */
 761       if (rmsmin>0.0)
 762         sprintf(buf1,"structures with rmsd > %g",rmsmin);
 763       else
 764         sprintf(buf1,"all structures");
 765       buf2[0]=buf3[0]='\0';
 766       if (write_ncl>=clust->ncl) {
 767         if (write_nst==0)
 768           sprintf(buf2,"all ");
 769       } else
 770         sprintf(buf2,"the first %d ",write_ncl);
 771       if (write_nst)
 772         sprintf(buf3," with more than %d structures",write_nst);
 773       sprintf(buf,"Writing %s for %sclusters%s to %s\n",buf1,buf2,buf3,trxsfn);
 774       ffprintf(stderr,log,buf);
 775     }
 776
 777     /* Prepare a reference structure for the orientation of the clusters  */
 778     if (bFit)
 779     reset_x(ifsize,fitidx,natom,NULL,xtps,mass);
 780     trxout = open_trx(trxfn,"w");
 781     /* Calculate the average structure in each cluster,               *
 782      * all structures are fitted to the first struture of the cluster */
 783     snew(xav,natom);
 784   }
 785
 786   if (transfn || ntransfn)
 787     ana_trans(clust, nf, transfn, ntransfn, log,rlo,rhi,oenv);
 788
 789   if (clustidfn) {
 790     fp=xvgropen(clustidfn,"Clusters",output_env_get_xvgr_tlabel(oenv),"Cluster #",oenv);
 791     fprintf(fp,"@    s0 symbol 2\n");
 792     fprintf(fp,"@    s0 symbol size 0.2\n");
 793     fprintf(fp,"@    s0 linestyle 0\n");
 794     for(i=0; i<nf; i++)
 795       fprintf(fp,"%8g %8d\n",time[i],clust->cl[i]);
 796     ffclose(fp);
 797   }
 798   if (sizefn) {
 799     fp=xvgropen(sizefn,"Cluster Sizes","Cluster #","# Structures",oenv);
 800     fprintf(fp,"@g%d type %s\n",0,"bar");
 801   }
 802   snew(structure,nf);
 803   fprintf(log,"\n%3s | %3s  %4s | %6s %4s | cluster members\n",
 804           "cl.","#st","rmsd","middle","rmsd");
 805   for(cl=1; cl<=clust->ncl; cl++) {
 806     /* prepare structures (fit, middle, average) */
 807     if (xav)
 808       for(i=0; i<natom;i++)
 809         clear_rvec(xav[i]);
 810     nstr=0;
 811     for(i1=0; i1<nf; i1++)
 812       if (clust->cl[i1] == cl) {
 813         structure[nstr] = i1;
 814         nstr++;
 815         if (trxfn && (bAverage || write_ncl) ) {
 816           if (bFit)
 817           reset_x(ifsize,fitidx,natom,NULL,xx[i1],mass);
 818           if (nstr == 1)
 819             first = i1;
 820           else if (bFit)
 821             do_fit(natom,mass,xx[first],xx[i1]);
 822           if (xav)
 823             for(i=0; i<natom; i++)
 824               rvec_inc(xav[i],xx[i1][i]);
 825         }
 826       }
 827     if (sizefn)
 828       fprintf(fp,"%8d %8d\n",cl,nstr);
 829     clrmsd = 0;
 830     midstr = 0;
 831     midrmsd = 10000;
 832     for(i1=0; i1<nstr; i1++) {
 833       r = 0;
 834       if (nstr > 1) {
 835         for(i=0; i<nstr; i++)
 836           if (i < i1)
 837             r += rmsd[structure[i]][structure[i1]];
 838           else
 839             r += rmsd[structure[i1]][structure[i]];
 840         r /= (nstr - 1);
 841       }
 842       if ( r < midrmsd ) {
 843         midstr = structure[i1];
 844         midrmsd = r;
 845       }
 846       clrmsd += r;
 847     }
 848     clrmsd /= nstr;
 849
 850     /* dump cluster info to logfile */
 851     if (nstr > 1) {
 852       sprintf(buf1,"%6.3f",clrmsd);
 853       if (buf1[0] == '0')
 854         buf1[0] = ' ';
 855       sprintf(buf2,"%5.3f",midrmsd);
 856       if (buf2[0] == '0')
 857         buf2[0] = ' ';
 858     } else {
 859       sprintf(buf1,"%5s","");
 860       sprintf(buf2,"%5s","");
 861     }
 862     fprintf(log,"%3d | %3d %s | %6g%s |",cl,nstr,buf1,time[midstr],buf2);
 863     for(i=0; i<nstr; i++) {
 864       if ((i % 7 == 0) && i)
 865         sprintf(buf,"\n%3s | %3s  %4s | %6s %4s |","","","","","");
 866       else
 867         buf[0] = '\0';
 868       i1 = structure[i];
 869       fprintf(log,"%s %6g",buf,time[i1]);
 870     }
 871     fprintf(log,"\n");
 872
 873     /* write structures to trajectory file(s) */
 874     if (trxfn) {
 875       if (write_ncl)
 876         for(i=0; i<nstr; i++)
 877           bWrite[i]=FALSE;
 878       if ( cl < write_ncl+1 && nstr > write_nst ) {
 879         /* Dump all structures for this cluster */
 880         /* generate numbered filename (there is a %d in trxfn!) */
 881         sprintf(buf,trxsfn,cl);
 882         trxsout = open_trx(buf,"w");
 883         for(i=0; i<nstr; i++) {
 884           bWrite[i] = TRUE;
 885           if (rmsmin>0.0)
 886             for(i1=0; i1<i && bWrite[i]; i1++)
 887               if (bWrite[i1])
 888                 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
 889           if (bWrite[i])
 890             write_trx(trxsout,iosize,outidx,atoms,i,time[structure[i]],zerobox,
 891                       xx[structure[i]],NULL,NULL);
 892         }
 893         close_trx(trxsout);
 894       }
 895       /* Dump the average structure for this cluster */
 896       if (bAverage) {
 897         for(i=0; i<natom; i++)
 898           svmul(1.0/nstr,xav[i],xav[i]);
 899       } else {
 900         for(i=0; i<natom; i++)
 901           copy_rvec(xx[midstr][i],xav[i]);
 902         if (bFit)
 903         reset_x(ifsize,fitidx,natom,NULL,xav,mass);
 904       }
 905       if (bFit)
 906         do_fit(natom,mass,xtps,xav);
 907       r = cl;
 908       write_trx(trxout,iosize,outidx,atoms,cl,time[midstr],zerobox,xav,NULL,NULL);
 909     }
 910   }
 911   /* clean up */
 912   if (trxfn) {
 913     close_trx(trxout);
 914     sfree(xav);
 915     if (write_ncl)
 916       sfree(bWrite);
 917   }
 918   sfree(structure);
 919   if (trxsfn)
 920     sfree(trxsfn);
 921 }
 922
 923 static void convert_mat(t_matrix *mat,t_mat *rms)
 924 {
 925   int i,j;
 926
 927   rms->n1 = mat->nx;
 928   matrix2real(mat,rms->mat);
 929   /* free input xpm matrix data */
 930   for(i=0; i<mat->nx; i++)
 931     sfree(mat->matrix[i]);
 932   sfree(mat->matrix);
 933
 934   for(i=0; i<mat->nx; i++)
 935     for(j=i; j<mat->nx; j++) {
 936       rms->sumrms += rms->mat[i][j];
 937       rms->maxrms = max(rms->maxrms, rms->mat[i][j]);
 938       if (j!=i)
 939         rms->minrms = min(rms->minrms, rms->mat[i][j]);
 940     }
 941   rms->nn = mat->nx;
 942 }
 943
 944 int gmx_cluster(int argc,char *argv[])
 945 {
 946   const char *desc[] = {
 947     "g_cluster can cluster structures with several different methods.",
 948     "Distances between structures can be determined from a trajectory",
 949     "or read from an XPM matrix file with the [TT]-dm[tt] option.",
 950     "RMS deviation after fitting or RMS deviation of atom-pair distances",
 951     "can be used to define the distance between structures.[PAR]",
 952
 953     "single linkage: add a structure to a cluster when its distance to any",
 954     "element of the cluster is less than [TT]cutoff[tt].[PAR]",
 955
 956     "Jarvis Patrick: add a structure to a cluster when this structure",
 957     "and a structure in the cluster have each other as neighbors and",
 958     "they have a least [TT]P[tt] neighbors in common. The neighbors",
 959     "of a structure are the M closest structures or all structures within",
 960     "[TT]cutoff[tt].[PAR]",
 961
 962     "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
 963
 964     "diagonalization: diagonalize the RMSD matrix.[PAR]"
 965
 966     "gromos: use algorithm as described in Daura [IT]et al.[it]",
 967     "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
 968     "Count number of neighbors using cut-off, take structure with",
 969     "largest number of neighbors with all its neighbors as cluster",
 970     "and eleminate it from the pool of clusters. Repeat for remaining",
 971     "structures in pool.[PAR]",
 972
 973     "When the clustering algorithm assigns each structure to exactly one",
 974     "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
 975     "file is supplied, the structure with",
 976     "the smallest average distance to the others or the average structure",
 977     "or all structures for each cluster will be written to a trajectory",
 978     "file. When writing all structures, separate numbered files are made",
 979     "for each cluster.[PAR]"
 980
 981     "Two output files are always written:[BR]",
 982     "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
 983     "and a graphical depiction of the clusters in the lower right half",
 984     "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
 985     "when two structures are in the same cluster.",
 986     "When [TT]-minstruct[tt] > 1 different colors will be used for each",
 987     "cluster.[BR]",
 988     "[TT]-g[tt] writes information on the options used and a detailed list",
 989     "of all clusters and their members.[PAR]",
 990
 991     "Additionally, a number of optional output files can be written:[BR]",
 992     "[TT]-dist[tt] writes the RMSD distribution.[BR]",
 993     "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
 994     "diagonalization.[BR]",
 995     "[TT]-sz[tt] writes the cluster sizes.[BR]",
 996     "[TT]-tr[tt] writes a matrix of the number transitions between",
 997     "cluster pairs.[BR]",
 998     "[TT]-ntr[tt] writes the total number of transitions to or from",
 999     "each cluster.[BR]",
1000     "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1001     "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1002     "structure of each cluster or writes numbered files with cluster members",
1003     "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1004     "[TT]-nst[tt] and [TT]-rmsmin[tt]).[BR]",
1005   };
1006
1007   FILE         *fp,*log;
1008   int          i,i1,i2,j,nf,nrms;
1009
1010   matrix       box;
1011   rvec         *xtps,*usextps,*x1,**xx=NULL;
1012   const char   *fn,*trx_out_fn;
1013   t_clusters   clust;
1014   t_mat        *rms;
1015   real         *eigval;
1016   t_topology   top;
1017   int          ePBC;
1018   t_atoms      useatoms;
1019   t_matrix     *readmat=NULL;
1020   real         *tmp;
1021
1022   int      isize=0,ifsize=0,iosize=0;
1023   atom_id  *index=NULL, *fitidx, *outidx;
1024   char     *grpname;
1025   real     rmsd,**d1,**d2,*time=NULL,time_invfac,*mass=NULL;
1026   char     buf[STRLEN],buf1[80],title[STRLEN];
1027   bool     bAnalyze,bUseRmsdCut,bJP_RMSD=FALSE,bReadMat,bReadTraj;
1028
1029   int method,ncluster=0;
1030   static const char *methodname[] = {
1031     NULL, "linkage", "jarvis-patrick","monte-carlo",
1032     "diagonalization", "gromos", NULL
1033   };
1034   enum { m_null, m_linkage, m_jarvis_patrick,
1035          m_monte_carlo, m_diagonalize, m_gromos, m_nr };
1036   /* Set colors for plotting: white = zero RMS, black = maximum */
1037   static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1038   static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1039   static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1040   static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1041   static int  nlevels=40,skip=1;
1042   static real scalemax=-1.0,rmsdcut=0.1,rmsmin=0.0;
1043   static bool bRMSdist=FALSE,bBinary=FALSE,bAverage=FALSE,bFit=TRUE;
1044   static int  niter=10000,seed=1993,write_ncl=0,write_nst=1,minstruct=1;
1045   static real kT=1e-3;
1046   static int  M=10,P=3;
1047   output_env_t oenv;
1048   t_pargs pa[] = {
1049     { "-dista", FALSE, etBOOL, {&bRMSdist},
1050       "Use RMSD of distances instead of RMS deviation" },
1051     { "-nlevels",FALSE,etINT,  {&nlevels},
1052       "Discretize RMSD matrix in # levels" },
1053     { "-cutoff",FALSE, etREAL, {&rmsdcut},
1054       "RMSD cut-off (nm) for two structures to be neighbor" },
1055     { "-fit",   FALSE, etBOOL, {&bFit},
1056       "Use least squares fitting before RMSD calculation" },
1057     { "-max",   FALSE, etREAL, {&scalemax},
1058       "Maximum level in RMSD matrix" },
1059     { "-skip",  FALSE, etINT,  {&skip},
1060       "Only analyze every nr-th frame" },
1061     { "-av",    FALSE, etBOOL, {&bAverage},
1062       "Write average iso middle structure for each cluster" },
1063     { "-wcl",   FALSE, etINT,  {&write_ncl},
1064       "Write all structures for first # clusters to numbered files" },
1065     { "-nst",   FALSE, etINT,  {&write_nst},
1066       "Only write all structures if more than # per cluster" },
1067     { "-rmsmin",FALSE, etREAL, {&rmsmin},
1068       "minimum rms difference with rest of cluster for writing structures" },
1069     { "-method",FALSE, etENUM, {methodname},
1070       "Method for cluster determination" },
1071     { "-minstruct", FALSE, etINT, {&minstruct},
1072       "Minimum number of structures in cluster for coloring in the xpm file" },
1073     { "-binary",FALSE, etBOOL, {&bBinary},
1074       "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1075       "is given by -cutoff" },
1076     { "-M",     FALSE, etINT,  {&M},
1077       "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1078       "0 is use cutoff" },
1079     { "-P",     FALSE, etINT,  {&P},
1080       "Number of identical nearest neighbors required to form a cluster" },
1081     { "-seed",  FALSE, etINT,  {&seed},
1082       "Random number seed for Monte Carlo clustering algorithm" },
1083     { "-niter", FALSE, etINT,  {&niter},
1084       "Number of iterations for MC" },
1085     { "-kT",    FALSE, etREAL, {&kT},
1086       "Boltzmann weighting factor for Monte Carlo optimization "
1087       "(zero turns off uphill steps)" }
1088   };
1089   t_filenm fnm[] = {
1090     { efTRX, "-f",     NULL,        ffOPTRD },
1091     { efTPS, "-s",     NULL,        ffOPTRD },
1092     { efNDX, NULL,     NULL,        ffOPTRD },
1093     { efXPM, "-dm",   "rmsd",       ffOPTRD },
1094     { efXPM, "-o",    "rmsd-clust", ffWRITE },
1095     { efLOG, "-g",    "cluster",    ffWRITE },
1096     { efXVG, "-dist", "rmsd-dist",  ffOPTWR },
1097     { efXVG, "-ev",   "rmsd-eig",   ffOPTWR },
1098     { efXVG, "-sz",   "clust-size", ffOPTWR},
1099     { efXPM, "-tr",   "clust-trans",ffOPTWR},
1100     { efXVG, "-ntr",  "clust-trans",ffOPTWR},
1101     { efXVG, "-clid", "clust-id.xvg",ffOPTWR},
1102     { efTRX, "-cl",   "clusters.pdb", ffOPTWR }
1103   };
1104 #define NFILE asize(fnm)
1105
1106   CopyRight(stderr,argv[0]);
1107   parse_common_args(&argc,argv,
1108                     PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
1109                     NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,
1110                     &oenv);
1111
1112   /* parse options */
1113   bReadMat   = opt2bSet("-dm",NFILE,fnm);
1114   bReadTraj  = opt2bSet("-f",NFILE,fnm) || !bReadMat;
1115   if ( opt2parg_bSet("-av",asize(pa),pa) ||
1116        opt2parg_bSet("-wcl",asize(pa),pa) ||
1117        opt2parg_bSet("-nst",asize(pa),pa) ||
1118        opt2parg_bSet("-rmsmin",asize(pa),pa) ||
1119        opt2bSet("-cl",NFILE,fnm) )
1120     trx_out_fn = opt2fn("-cl",NFILE,fnm);
1121   else
1122     trx_out_fn = NULL;
1123   if (bReadMat && output_env_get_time_factor(oenv)!=1) {
1124     fprintf(stderr,
1125             "\nWarning: assuming the time unit in %s is %s\n",
1126             opt2fn("-dm",NFILE,fnm),output_env_get_time_unit(oenv));
1127   }
1128   if (trx_out_fn && !bReadTraj)
1129     fprintf(stderr,"\nWarning: "
1130             "cannot write cluster structures without reading trajectory\n"
1131             "         ignoring option -cl %s\n", trx_out_fn);
1132
1133   method=1;
1134   while ( method < m_nr && strcasecmp(methodname[0], methodname[method])!=0 )
1135     method++;
1136   if (method == m_nr)
1137     gmx_fatal(FARGS,"Invalid method");
1138
1139   bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1140               method == m_gromos );
1141
1142   /* Open log file */
1143   log = ftp2FILE(efLOG,NFILE,fnm,"w");
1144
1145   fprintf(stderr,"Using %s method for clustering\n",methodname[0]);
1146   fprintf(log,"Using %s method for clustering\n",methodname[0]);
1147
1148   /* check input and write parameters to log file */
1149   bUseRmsdCut = FALSE;
1150   if (method == m_jarvis_patrick) {
1151     bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff",asize(pa),pa);
1152     if ((M<0) || (M == 1))
1153       gmx_fatal(FARGS,"M (%d) must be 0 or larger than 1",M);
1154     if (M < 2) {
1155       sprintf(buf1,"Will use P=%d and RMSD cutoff (%g)",P,rmsdcut);
1156       bUseRmsdCut = TRUE;
1157     } else {
1158       if (P >= M)
1159         gmx_fatal(FARGS,"Number of neighbors required (P) must be less than M");
1160       if (bJP_RMSD) {
1161         sprintf(buf1,"Will use P=%d, M=%d and RMSD cutoff (%g)",P,M,rmsdcut);
1162         bUseRmsdCut = TRUE;
1163       } else
1164         sprintf(buf1,"Will use P=%d, M=%d",P,M);
1165     }
1166     ffprintf1(stderr,log,buf,"%s for determining the neighbors\n\n",buf1);
1167   } else /* method != m_jarvis */
1168     bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1169   if (bUseRmsdCut && method != m_jarvis_patrick)
1170     fprintf(log,"Using RMSD cutoff %g nm\n",rmsdcut);
1171   if ( method==m_monte_carlo )
1172     fprintf(log,"Using %d iterations\n",niter);
1173
1174   if (skip < 1)
1175     gmx_fatal(FARGS,"skip (%d) should be >= 1",skip);
1176
1177   /* get input */
1178   if (bReadTraj) {
1179     /* don't read mass-database as masses (and top) are not used */
1180     read_tps_conf(ftp2fn(efTPS,NFILE,fnm),buf,&top,&ePBC,&xtps,NULL,box,
1181                   bAnalyze);
1182
1183     fprintf(stderr,"\nSelect group for least squares fit%s:\n",
1184             bReadMat?"":" and RMSD calculation");
1185     get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
1186               1,&ifsize,&fitidx,&grpname);
1187     if (trx_out_fn) {
1188       fprintf(stderr,"\nSelect group for output:\n");
1189       get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
1190                 1,&iosize,&outidx,&grpname);
1191       /* merge and convert both index groups: */
1192       /* first copy outidx to index. let outidx refer to elements in index */
1193       snew(index,iosize);
1194       isize = iosize;
1195       for(i=0; i<iosize; i++) {
1196         index[i]=outidx[i];
1197         outidx[i]=i;
1198       }
1199       /* now lookup elements from fitidx in index, add them if necessary
1200          and also let fitidx refer to elements in index */
1201       for(i=0; i<ifsize; i++) {
1202         j=0;
1203         while (j<isize && index[j]!=fitidx[i])
1204           j++;
1205         if (j>=isize) {
1206           /* slow this way, but doesn't matter much */
1207           isize++;
1208           srenew(index,isize);
1209         }
1210         index[j]=fitidx[i];
1211         fitidx[i]=j;
1212       }
1213     } else { /* !trx_out_fn */
1214       isize = ifsize;
1215       snew(index, isize);
1216       for(i=0; i<ifsize; i++) {
1217         index[i]=fitidx[i];
1218         fitidx[i]=i;
1219       }
1220     }
1221   }
1222   /* Initiate arrays */
1223   snew(d1,isize);
1224   snew(d2,isize);
1225   for(i=0; (i<isize); i++) {
1226     snew(d1[i],isize);
1227     snew(d2[i],isize);
1228   }
1229
1230   if (bReadTraj) {
1231     /* Loop over first coordinate file */
1232     fn = opt2fn("-f",NFILE,fnm);
1233
1234     xx = read_whole_trj(fn,isize,index,skip,&nf,&time,oenv);
1235     output_env_conv_times(oenv, nf, time);
1236     if (!bRMSdist || bAnalyze) {
1237       /* Center all frames on zero */
1238       snew(mass,isize);
1239       for(i=0; i<ifsize; i++)
1240         mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1241       if (bFit)
1242       for(i=0; i<nf; i++)
1243         reset_x(ifsize,fitidx,isize,NULL,xx[i],mass);
1244     }
1245   }
1246   if (bReadMat) {
1247     fprintf(stderr,"Reading rms distance matrix ");
1248     read_xpm_matrix(opt2fn("-dm",NFILE,fnm),&readmat);
1249     fprintf(stderr,"\n");
1250     if (readmat[0].nx != readmat[0].ny)
1251       gmx_fatal(FARGS,"Matrix (%dx%d) is not square",
1252                   readmat[0].nx,readmat[0].ny);
1253     if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1254       gmx_fatal(FARGS,"Matrix size (%dx%d) does not match the number of "
1255                   "frames (%d)",readmat[0].nx,readmat[0].ny,nf);
1256
1257     nf = readmat[0].nx;
1258     sfree(time);
1259     time = readmat[0].axis_x;
1260     time_invfac = output_env_get_time_invfactor(oenv);
1261     for(i=0; i<nf; i++)
1262       time[i] *= time_invfac;
1263
1264     rms = init_mat(readmat[0].nx,method == m_diagonalize);
1265     convert_mat(&(readmat[0]),rms);
1266
1267     nlevels = readmat[0].nmap;
1268   } else { /* !bReadMat */
1269     rms = init_mat(nf,method == m_diagonalize);
1270     nrms = (nf*(nf-1))/2;
1271     if (!bRMSdist) {
1272       fprintf(stderr,"Computing %dx%d RMS deviation matrix\n",nf,nf);
1273       snew(x1,isize);
1274       for(i1=0; (i1<nf); i1++) {
1275         for(i2=i1+1; (i2<nf); i2++) {
1276           for(i=0; i<isize; i++)
1277             copy_rvec(xx[i1][i],x1[i]);
1278           if (bFit)
1279             do_fit(isize,mass,xx[i2],x1);
1280           rmsd = rmsdev(isize,mass,xx[i2],x1);
1281           set_mat_entry(rms,i1,i2,rmsd);
1282         }
1283         nrms -= (nf-i1-1);
1284         fprintf(stderr,"\r# RMSD calculations left: %d   ",nrms);
1285       }
1286     } else { /* bRMSdist */
1287       fprintf(stderr,"Computing %dx%d RMS distance deviation matrix\n",nf,nf);
1288       for(i1=0; (i1<nf); i1++) {
1289         calc_dist(isize,xx[i1],d1);
1290         for(i2=i1+1; (i2<nf); i2++) {
1291           calc_dist(isize,xx[i2],d2);
1292           set_mat_entry(rms,i1,i2,rms_dist(isize,d1,d2));
1293       }
1294         nrms -= (nf-i1-1);
1295         fprintf(stderr,"\r# RMSD calculations left: %d   ",nrms);
1296       }
1297     }
1298     fprintf(stderr,"\n\n");
1299   }
1300   ffprintf2(stderr,log,buf,"The RMSD ranges from %g to %g nm\n",
1301             rms->minrms,rms->maxrms);
1302   ffprintf1(stderr,log,buf,"Average RMSD is %g\n",2*rms->sumrms/(nf*(nf-1)));
1303   ffprintf1(stderr,log,buf,"Number of structures for matrix %d\n",nf);
1304   ffprintf1(stderr,log,buf,"Energy of the matrix is %g nm\n",mat_energy(rms));
1305   if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1306     fprintf(stderr,"WARNING: rmsd cutoff %g is outside range of rmsd values "
1307             "%g to %g\n",rmsdcut,rms->minrms,rms->maxrms);
1308   if (bAnalyze && (rmsmin < rms->minrms) )
1309     fprintf(stderr,"WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1310             rmsmin,rms->minrms);
1311   if (bAnalyze && (rmsmin > rmsdcut) )
1312     fprintf(stderr,"WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1313             rmsmin,rmsdcut);
1314
1315   /* Plot the rmsd distribution */
1316   rmsd_distribution(opt2fn("-dist",NFILE,fnm),rms,oenv);
1317
1318   if (bBinary) {
1319     for(i1=0; (i1 < nf); i1++)
1320       for(i2=0; (i2 < nf); i2++)
1321         if (rms->mat[i1][i2] < rmsdcut)
1322           rms->mat[i1][i2] = 0;
1323         else
1324           rms->mat[i1][i2] = 1;
1325   }
1326
1327   snew(clust.cl,nf);
1328   switch (method) {
1329   case m_linkage:
1330     /* Now sort the matrix and write it out again */
1331     gather(rms,rmsdcut,&clust);
1332     break;
1333   case m_diagonalize:
1334     /* Do a diagonalization */
1335       snew(eigval,nf);
1336       snew(tmp,nf*nf);
1337       memcpy(tmp,rms->mat[0],nf*nf*sizeof(real));
1338       eigensolver(tmp,nf,0,nf,eigval,rms->mat[0]);
1339       sfree(tmp);
1340
1341       fp = xvgropen(opt2fn("-ev",NFILE,fnm),"RMSD matrix Eigenvalues",
1342                     "Eigenvector index","Eigenvalues (nm\\S2\\N)",oenv);
1343       for(i=0; (i<nf); i++)
1344           fprintf(fp,"%10d  %10g\n",i,eigval[i]);
1345           ffclose(fp);
1346       break;
1347   case m_monte_carlo:
1348     mc_optimize(log,rms,niter,&seed,kT);
1349     swap_mat(rms);
1350     reset_index(rms);
1351     break;
1352   case m_jarvis_patrick:
1353     jarvis_patrick(rms->nn,rms->mat,M,P,bJP_RMSD ? rmsdcut : -1,&clust);
1354     break;
1355   case m_gromos:
1356     gromos(rms->nn,rms->mat,rmsdcut,&clust);
1357     break;
1358   default:
1359     gmx_fatal(FARGS,"DEATH HORROR unknown method \"%s\"",methodname[0]);
1360   }
1361
1362   if (method == m_monte_carlo || method == m_diagonalize)
1363     fprintf(stderr,"Energy of the matrix after clustering is %g nm\n",
1364             mat_energy(rms));
1365
1366   if (bAnalyze) {
1367     if (minstruct > 1) {
1368       ncluster = plot_clusters(nf,rms->mat,&clust,nlevels,minstruct);
1369     } else {
1370       mark_clusters(nf,rms->mat,rms->maxrms,&clust);
1371     }
1372     init_t_atoms(&useatoms,isize,FALSE);
1373     snew(usextps, isize);
1374     useatoms.resinfo = top.atoms.resinfo;
1375     for(i=0; i<isize; i++) {
1376       useatoms.atomname[i]=top.atoms.atomname[index[i]];
1377       useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1378       useatoms.nres = max(useatoms.nres,useatoms.atom[i].resind+1);
1379       copy_rvec(xtps[index[i]],usextps[i]);
1380     }
1381     useatoms.nr=isize;
1382     analyze_clusters(nf,&clust,rms->mat,isize,&useatoms,usextps,mass,xx,time,
1383                      ifsize,fitidx,iosize,outidx,
1384                      bReadTraj?trx_out_fn:NULL,
1385                      opt2fn_null("-sz",NFILE,fnm),
1386                      opt2fn_null("-tr",NFILE,fnm),
1387                      opt2fn_null("-ntr",NFILE,fnm),
1388                      opt2fn_null("-clid",NFILE,fnm),
1389                      bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1390                      rlo_bot,rhi_bot,oenv);
1391   }
1392   ffclose(log);
1393
1394   if (bBinary && !bAnalyze)
1395     /* Make the clustering visible */
1396     for(i2=0; (i2 < nf); i2++)
1397        for(i1=i2+1; (i1 < nf); i1++)
1398          if (rms->mat[i1][i2])
1399            rms->mat[i1][i2] = rms->maxrms;
1400
1401   fp = opt2FILE("-o",NFILE,fnm,"w");
1402   fprintf(stderr,"Writing rms distance/clustering matrix ");
1403   if (bReadMat) {
1404     write_xpm(fp,0,readmat[0].title,readmat[0].legend,readmat[0].label_x,
1405               readmat[0].label_y,nf,nf,readmat[0].axis_x,readmat[0].axis_y,
1406               rms->mat,0.0,rms->maxrms,rlo_top,rhi_top,&nlevels);
1407   }
1408   else {
1409     sprintf(buf,"Time (%s)",output_env_get_time_unit(oenv));
1410     sprintf(title,"RMS%sDeviation / Cluster Index",
1411             bRMSdist ? " Distance " : " ");
1412     if (minstruct > 1) {
1413       write_xpm_split(fp,0,title,"RMSD (nm)",buf,buf,
1414                       nf,nf,time,time,rms->mat,0.0,rms->maxrms,&nlevels,
1415                       rlo_top,rhi_top,0.0,(real) ncluster,
1416                       &ncluster,TRUE,rlo_bot,rhi_bot);
1417     } else {
1418       write_xpm(fp,0,title,"RMSD (nm)",buf,buf,
1419                 nf,nf,time,time,rms->mat,0.0,rms->maxrms,
1420                 rlo_top,rhi_top,&nlevels);
1421     }
1422   }
1423   fprintf(stderr,"\n");
1424   ffclose(fp);
1425
1426   /* now show what we've done */
1427   do_view(oenv,opt2fn("-o",NFILE,fnm),"-nxy");
1428   do_view(oenv,opt2fn_null("-sz",NFILE,fnm),"-nxy");
1429   if (method == m_diagonalize)
1430     do_view(oenv,opt2fn_null("-ev",NFILE,fnm),"-nxy");
1431   do_view(oenv,opt2fn("-dist",NFILE,fnm),"-nxy");
1432   if (bAnalyze) {
1433     do_view(oenv,opt2fn_null("-tr",NFILE,fnm),"-nxy");
1434     do_view(oenv,opt2fn_null("-ntr",NFILE,fnm),"-nxy");
1435     do_view(oenv,opt2fn_null("-clid",NFILE,fnm),"-nxy");
1436   }
1437
1438   /* Thank the user for her patience */
1439   thanx(stderr);
1440
1441   return 0;
1442 }