src/gmxlib/index.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROningen Mixture of Alchemy and Childrens' Stories
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <ctype.h>
  40 #include <string.h>
  41 #include "sysstuff.h"
  42 #include "strdb.h"
  43 #include "futil.h"
  44 #include "macros.h"
  45 #include "names.h"
  46 #include "string2.h"
  47 #include "statutil.h"
  48 #include "confio.h"
  49 #include "main.h"
  50 #include "copyrite.h"
  51 #include "typedefs.h"
  52 #include "smalloc.h"
  53 #include "invblock.h"
  54 #include "macros.h"
  55 #include "index.h"
  56 #include "txtdump.h"
  57 #include "gmxfio.h"
  58
  59 typedef enum { etOther, etProt, etDNA, etRNA, erestNR } eRestp;
  60 static const char *ResTP[erestNR] = { "OTHER", "PROTEIN", "DNA", "RNA" };
  61
  62 static const char *DNA[]          = { "DA", "DT", "DG", "DC", "DU" };
  63 #define  NDNA asize(DNA)
  64
  65 static const char *RNA[]          = {  "A",  "T",  "G",  "C",  "U" };
  66 #define  NRNA asize(RNA)
  67
  68 static const char *Negate[] = { "SOL" };
  69 #define  NNEGATE asize(Negate)
  70
  71 static bool gmx_ask_yesno(bool bASK)
  72 {
  73   char c;
  74
  75   if (bASK) {
  76     do {
  77       c=toupper(fgetc(stdin));
  78     } while ((c != 'Y') && (c != 'N'));
  79
  80     return (c == 'Y');
  81   }
  82   else
  83     return FALSE;
  84 }
  85
  86 t_blocka *new_blocka(void)
  87 {
  88   t_blocka *block;
  89
  90   snew(block,1);
  91   snew(block->index,1);
  92
  93   return block;
  94 }
  95
  96 void write_index(const char *outf, t_blocka *b,char **gnames)
  97 {
  98   FILE *out;
  99   int  i,j,k;
 100
 101   out=gmx_fio_fopen(outf,"w");
 102   /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 103   for(i=0; (i<b->nr); i++) {
 104     fprintf(out,"[ %s ]\n",gnames[i]);
 105     for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 106       fprintf(out,"%4d ",b->a[j]+1);
 107       if ((k % 15) == 14)
 108         fprintf(out,"\n");
 109     }
 110     fprintf(out,"\n");
 111   }
 112   gmx_fio_fclose(out);
 113 }
 114
 115 void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 116 {
 117   int i;
 118
 119   srenew(b->index,b->nr+2);
 120   srenew(*gnames,b->nr+1);
 121   (*gnames)[b->nr]=strdup(name);
 122
 123   srenew(b->a,b->nra+nra);
 124   for(i=0; (i<nra); i++)
 125     b->a[b->nra++]=a[i];
 126   b->nr++;
 127   b->index[b->nr]=b->nra;
 128 }
 129
 130 /* compare index in `a' with group in `b' at `index',
 131    when `index'<0 it is relative to end of `b' */
 132 static bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 133 {
 134   int i;
 135
 136   if (index < 0)
 137     index = b->nr-1+index;
 138   if (index >= b->nr)
 139     gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 140   /* compare sizes */
 141   if ( nra != b->index[index+1] - b->index[index] )
 142     return FALSE;
 143   for(i=0; i<nra; i++)
 144     if ( a[i] != b->a[b->index[index]+i] )
 145       return FALSE;
 146   return TRUE;
 147 }
 148
 149 static void p_status(int nres,eRestp restp[],bool bVerb)
 150 {
 151   int i,j,ntp[erestNR];
 152
 153   for(i=0; (i<erestNR); i++)
 154     ntp[i]=0;
 155   for(j=0; (j<nres); j++)
 156     ntp[restp[j]]++;
 157
 158   if (bVerb)
 159     for(i=0; (i<erestNR); i++)
 160       printf("There are: %5d %10s residues\n",ntp[i],ResTP[i]);
 161 }
 162
 163 atom_id *mk_aid(t_atoms *atoms,eRestp restp[],eRestp res,int *nra,
 164                 bool bTrue)
 165 /* Make an array of atom_ids for all atoms with:
 166  * (restp[i] == res) == bTrue
 167  */
 168 {
 169   atom_id *a;
 170   int     i;
 171
 172   snew(a,atoms->nr);
 173   *nra=0;
 174   for(i=0; (i<atoms->nr); i++)
 175     if ((restp[atoms->atom[i].resind] == res) == bTrue)
 176       a[(*nra)++]=i;
 177
 178   return a;
 179 }
 180
 181 typedef struct {
 182   char *rname;
 183   bool bNeg;
 184   char *gname;
 185 } restp_t;
 186
 187 static void analyse_other(eRestp Restp[],t_atoms *atoms,
 188                           t_blocka *gb,char ***gn,bool bASK,bool bVerb)
 189 {
 190   restp_t *restp=NULL;
 191   char **attp=NULL;
 192   char *rname,*aname;
 193   atom_id *other_ndx,*aid,*aaid;
 194   int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 195
 196   for(i=0; (i<atoms->nres); i++)
 197     if (Restp[i] == etOther)
 198       break;
 199   if (i < atoms->nres) {
 200     /* we have others */
 201     if (bVerb)
 202       printf("Analysing Other...\n");
 203     snew(other_ndx,atoms->nr);
 204     for(k=0; (k<atoms->nr); k++) {
 205       resind = atoms->atom[k].resind;
 206       rname = *atoms->resinfo[resind].name;
 207       if (Restp[resind] == etOther) {
 208         for(l=0; (l<nrestp); l++)
 209           if (strcmp(restp[l].rname,rname) == 0)
 210             break;
 211         if (l==nrestp) {
 212           srenew(restp,nrestp+1);
 213           restp[nrestp].rname = strdup(rname);
 214           restp[nrestp].bNeg  = FALSE;
 215           restp[nrestp].gname = strdup(rname);
 216           nrestp++;
 217           for(i=0; i<NNEGATE; i++) {
 218             if (strcmp(rname,Negate[i]) == 0) {
 219               srenew(restp,nrestp+1);
 220               restp[nrestp].rname = strdup(rname);
 221               restp[nrestp].bNeg  = TRUE;
 222               snew(restp[nrestp].gname,4+strlen(rname)+1);
 223               sprintf(restp[nrestp].gname,"%s%s","non-",rname);
 224               nrestp++;
 225             }
 226           }
 227         }
 228       }
 229     }
 230     for(i=0; (i<nrestp); i++) {
 231       snew(aid,atoms->nr);
 232       naid=0;
 233       for(j=0; (j<atoms->nr); j++) {
 234         rname = *atoms->resinfo[atoms->atom[j].resind].name;
 235         if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 236             (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 237           aid[naid++] = j;
 238         }
 239       }
 240       add_grp(gb,gn,naid,aid,restp[i].gname);
 241       if (bASK) {
 242         printf("split %s into atoms (y/n) ? ",restp[i].gname);
 243         fflush(stdout);
 244         if (gmx_ask_yesno(bASK)) {
 245           natp=0;
 246           for(k=0; (k<naid); k++) {
 247             aname=*atoms->atomname[aid[k]];
 248             for(l=0; (l<natp); l++)
 249               if (strcmp(aname,attp[l]) == 0)
 250                 break;
 251             if (l == natp) {
 252               srenew(attp,++natp);
 253               attp[natp-1]=aname;
 254             }
 255           }
 256           if (natp > 1) {
 257             for(l=0; (l<natp); l++) {
 258               snew(aaid,naid);
 259               naaid=0;
 260               for(k=0; (k<naid); k++) {
 261                 aname=*atoms->atomname[aid[k]];
 262                 if (strcmp(aname,attp[l])==0)
 263                   aaid[naaid++]=aid[k];
 264               }
 265               add_grp(gb,gn,naaid,aaid,attp[l]);
 266               sfree(aaid);
 267             }
 268           }
 269           sfree(attp);
 270           attp=NULL;
 271         }
 272         sfree(aid);
 273       }
 274     }
 275     sfree(other_ndx);
 276   }
 277 }
 278
 279 static void analyse_prot(eRestp restp[],t_atoms *atoms,
 280                          t_blocka *gb,char ***gn,bool bASK,bool bVerb)
 281 {
 282   /* atomnames to be used in constructing index groups: */
 283   static const char *pnoh[]    = { "H" };
 284   static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 285                              "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 286   static const char *calpha[]  = { "CA" };
 287   static const char *bb[]      = { "N","CA","C" };
 288   static const char *mc[]      = { "N","CA","C","O","O1","O2","OXT" };
 289   static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OT","OXT" };
 290   static const char *mch[]     = { "N","CA","C","O","O1","O2","OT","OXT",
 291                              "H1","H2","H3","H" };
 292   /* array of arrays of atomnames: */
 293   static const char **chains[] = { NULL,pnoh,calpha,bb,mc,mcb,mch,mch,mch,pnodum };
 294 #define NCH asize(chains)
 295   /* array of sizes of arrays of atomnames: */
 296   const int       sizes[NCH] = {
 297     0, asize(pnoh), asize(calpha), asize(bb),
 298     asize(mc), asize(mcb), asize(mch), asize(mch), asize(mch), asize(pnodum)
 299   };
 300   /* descriptive names of index groups */
 301   const char   *ch_name[NCH] = {
 302     "Protein", "Protein-H", "C-alpha", "Backbone",
 303     "MainChain", "MainChain+Cb", "MainChain+H", "SideChain", "SideChain-H",
 304     "Prot-Masses"
 305   };
 306   /* construct index group containing (TRUE) or excluding (FALSE)
 307      given atom names */
 308   const bool complement[NCH] = {
 309     TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE
 310   };
 311   const int  wholename[NCH]  = { -1, 0,-1,-1,-1,-1,-1,-1, 11,-1 };
 312   /* the index in wholename gives the first item in the arrays of
 313    * atomtypes that should be tested with 'strncasecmp' in stead of
 314    * strcasecmp, or -1 if all items should be tested with strcasecmp
 315    * This is comparable to using a '*' wildcard at the end of specific
 316    * atom names, but that is more involved to implement...
 317    */
 318   /* only add index group if it differs from the specified one,
 319      specify -1 to always add group */
 320   const int compareto[NCH] = { -1,-1,-1,-1,-1,-1,-1,-1,-1, 0 };
 321
 322   int     n,j;
 323   atom_id *aid;
 324   int     nra,nnpres,npres;
 325   bool    match;
 326   char    ndx_name[STRLEN],*atnm;
 327   int i;
 328
 329   if (bVerb)
 330     printf("Analysing Protein...\n");
 331   snew(aid,atoms->nr);
 332
 333   /* calculate the number of protein residues */
 334   npres=0;
 335   for(i=0; (i<atoms->nres); i++)
 336     if (restp[i] == etProt)
 337       npres++;
 338
 339   /* find matching or complement atoms */
 340   for(i=0; (i<(int)NCH); i++) {
 341     nra=0;
 342     for(n=0; (n<atoms->nr); n++) {
 343       if (restp[atoms->atom[n].resind] == etProt) {
 344         match=FALSE;
 345         for(j=0; (j<sizes[i]); j++) {
 346           /* skip digits at beginning of atomname, e.g. 1H */
 347           atnm=*atoms->atomname[n];
 348           while (isdigit(atnm[0]))
 349             atnm++;
 350           if ( (wholename[i]==-1) || (j<wholename[i]) ) {
 351             if (strcasecmp(chains[i][j],atnm) == 0)
 352               match=TRUE;
 353           } else {
 354             if (strncasecmp(chains[i][j],atnm,strlen(chains[i][j])) == 0)
 355               match=TRUE;
 356           }
 357         }
 358         if (match != complement[i])
 359           aid[nra++]=n;
 360       }
 361     }
 362     /* if we want to add this group always or it differs from previous
 363        group, add it: */
 364     if ( compareto[i] == -1 || !grp_cmp(gb,nra,aid,compareto[i]-i) )
 365       add_grp(gb,gn,nra,aid,ch_name[i]);
 366   }
 367
 368   if (bASK) {
 369     for(i=0; (i<(int)NCH); i++) {
 370       printf("Split %12s into %5d residues (y/n) ? ",ch_name[i],npres);
 371       if (gmx_ask_yesno(bASK)) {
 372         int resind;
 373         nra = 0;
 374         for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 375           resind = atoms->atom[n].resind;
 376           for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 377             match=FALSE;
 378             for(j=0;(j<sizes[i]); j++)
 379               if (strcasecmp(chains[i][j],*atoms->atomname[n]) == 0)
 380                 match=TRUE;
 381             if (match != complement[i])
 382               aid[nra++]=n;
 383           }
 384           /* copy the residuename to the tail of the groupname */
 385           if (nra > 0) {
 386             t_resinfo *ri;
 387             ri = &atoms->resinfo[resind];
 388             sprintf(ndx_name,"%s_%s%d%c",
 389                     ch_name[i],*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 390             add_grp(gb,gn,nra,aid,ndx_name);
 391             nra = 0;
 392           }
 393         }
 394       }
 395     }
 396     printf("Make group with sidechain and C=O swapped (y/n) ? ");
 397     if (gmx_ask_yesno(bASK)) {
 398       /* Make swap sidechain C=O index */
 399       int resind,hold;
 400       nra = 0;
 401       for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 402         resind = atoms->atom[n].resind;
 403         hold  = -1;
 404         for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 405           if (strcmp("CA",*atoms->atomname[n]) == 0) {
 406             aid[nra++]=n;
 407             hold=nra;
 408             nra+=2;
 409           } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 410             if (hold == -1)
 411               gmx_incons("Atom naming problem");
 412             aid[hold]=n;
 413           } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 414             if (hold == -1)
 415               gmx_incons("Atom naming problem");
 416             aid[hold+1]=n;
 417           } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 418             if (hold == -1)
 419               gmx_incons("Atom naming problem");
 420             aid[hold+1]=n;
 421           } else
 422             aid[nra++]=n;
 423       }
 424       /* copy the residuename to the tail of the groupname */
 425       if (nra > 0) {
 426         add_grp(gb,gn,nra,aid,"SwapSC-CO");
 427         nra = 0;
 428       }
 429     }
 430   }
 431   sfree(aid);
 432 }
 433
 434 static void analyse_dna(eRestp restp[],t_atoms *atoms,
 435                         t_blocka *gb,char ***gn,bool bASK,bool bVerb)
 436 {
 437   if (bVerb)
 438     printf("Analysing DNA... (not really)\n");
 439   if (debug)
 440     printf("eRestp %p; atoms %p; gb %p; gn %p; bASK %s; bASK %s",
 441            (void *)restp, (void *)atoms, (void *)gb, (void *)gn,
 442            bool_names[bASK], bool_names[bVerb]);
 443 }
 444
 445 t_aa_names *get_aa_names(void)
 446 {
 447   /* Read the database in aminoacids.dat */
 448   t_aa_names *aan;
 449
 450   snew(aan,1);
 451
 452   aan->n = get_strings("aminoacids.dat",&(aan->aa));
 453   /* qsort(aan->aa,aan->n,sizeof(aan->aa[0]),strcmp);*/
 454
 455   return aan;
 456 }
 457
 458 bool is_residue(t_aa_names *aan,char *resnm)
 459 {
 460   /* gives true if resnm occurs in aminoacids.dat */
 461   int i;
 462
 463   for(i=0; i<aan->n; i++) {
 464     if (strcasecmp(aan->aa[i],resnm) == 0) {
 465       return TRUE;
 466     }
 467   }
 468   for(i=0; i<NDNA; i++) {
 469     if (strcasecmp(DNA[i],resnm) == 0) {
 470       return TRUE;
 471     }
 472   }
 473   for(i=0; i<NRNA; i++) {
 474     if (strcasecmp(RNA[i],resnm) == 0) {
 475       return TRUE;
 476     }
 477   }
 478
 479   return FALSE;
 480 }
 481
 482 bool is_protein(t_aa_names *aan,char *resnm)
 483 {
 484   /* gives true if resnm occurs in aminoacids.dat */
 485   int i;
 486
 487   for(i=0; (i<aan->n); i++)
 488     if (strcasecmp(aan->aa[i],resnm) == 0)
 489       return TRUE;
 490
 491   return FALSE;
 492 }
 493
 494 void done_aa_names(t_aa_names **aan)
 495 {
 496   /* Free memory */
 497   int i;
 498
 499   for(i=0; (i<(*aan)->n); i++)
 500     sfree((*aan)->aa[i]);
 501   sfree((*aan)->aa);
 502   sfree(*aan);
 503   *aan = NULL;
 504 }
 505
 506 void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,bool bASK,bool bVerb)
 507 {
 508   t_aa_names *aan;
 509   eRestp  *restp;
 510   char    *resnm;
 511   atom_id *aid;
 512   int     nra;
 513   int     i;
 514   size_t  j;
 515
 516   if (bVerb)
 517     printf("Analysing residue names:\n");
 518   snew(restp,atoms->nres);
 519   aid=mk_aid(atoms,restp,etOther,&nra,TRUE);
 520   add_grp(gb,gn,nra,aid,"System");
 521   sfree(aid);
 522
 523   aan = get_aa_names();
 524   for(i=0; (i<atoms->nres); i++) {
 525     resnm = *atoms->resinfo[i].name;
 526     if ((restp[i] == etOther) && is_protein(aan,resnm))
 527       restp[i] = etProt;
 528     if (restp[i] == etOther) {
 529       for(j=0; (j<NDNA);  j++) {
 530         if (strcasecmp(DNA[j],resnm) == 0)
 531           restp[i] = etDNA;
 532       }
 533       for(j=0; (j<NRNA);  j++) {
 534         if (strcasecmp(RNA[j],resnm) == 0)
 535           restp[i] = etRNA;
 536       }
 537     }
 538   }
 539   p_status(atoms->nres,restp,bVerb);
 540   done_aa_names(&aan);
 541
 542   /* Protein */
 543   aid=mk_aid(atoms,restp,etProt,&nra,TRUE);
 544   if (nra > 0)
 545     analyse_prot(restp,atoms,gb,gn,bASK,bVerb);
 546
 547   sfree(aid);
 548
 549   /* Non-Protein */
 550   aid=mk_aid(atoms,restp,etProt,&nra,FALSE);
 551   if ((nra > 0) && (nra < atoms->nr))
 552     add_grp(gb,gn,nra,aid,"non-Protein");
 553   sfree(aid);
 554
 555   /* DNA */
 556   aid=mk_aid(atoms,restp,etDNA,&nra,TRUE);
 557   if (nra > 0) {
 558     add_grp(gb,gn,nra,aid,"DNA");
 559     analyse_dna(restp,atoms,gb,gn,bASK,bVerb);
 560   }
 561   sfree(aid);
 562
 563   /* RNA */
 564   aid=mk_aid(atoms,restp,etRNA,&nra,TRUE);
 565   if (nra > 0) {
 566     add_grp(gb,gn,nra,aid,"RNA");
 567     analyse_dna(restp,atoms,gb,gn,bASK,bVerb);
 568   }
 569   sfree(aid);
 570
 571   /* Other */
 572   analyse_other(restp,atoms,gb,gn,bASK,bVerb);
 573
 574   sfree(restp);
 575 }
 576
 577 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 578 {
 579   int i;
 580
 581   for(i=0; i<n; i++)
 582     if (index[i] >= natoms)
 583       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 584                   gname ? gname : "Index",i+1, index[i]+1,
 585                   traj ? traj : "the trajectory",natoms);
 586     else if (index[i] < 0)
 587       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 588                 gname ? gname : "Index",i+1, index[i]+1);
 589 }
 590
 591 t_blocka *init_index(const char *gfile, char ***grpname)
 592 {
 593   FILE     *in;
 594   t_blocka  *b;
 595   int      a,maxentries;
 596   int      i,j,ng,nread;
 597   char     line[STRLEN],*pt,str[STRLEN];
 598
 599   in=gmx_fio_fopen(gfile,"r");
 600   snew(b,1);
 601   get_a_line(in,line,STRLEN);
 602   if ( line[0]=='[' ) {
 603     /* new format */
 604     b->nr=0;
 605     b->index=NULL;
 606     b->nra=0;
 607     b->a=NULL;
 608     *grpname=NULL;
 609     maxentries=0;
 610     do {
 611       if (get_header(line,str)) {
 612         b->nr++;
 613         srenew(b->index,b->nr+1);
 614         srenew(*grpname,b->nr);
 615         if (b->nr==1)
 616           b->index[0]=0;
 617         b->index[b->nr]=b->index[b->nr-1];
 618         (*grpname)[b->nr-1]=strdup(str);
 619       } else {
 620         pt=line;
 621         while ((i=sscanf(pt,"%s",str)) == 1) {
 622           i=b->index[b->nr];
 623           if (i>=maxentries) {
 624             maxentries+=1024;
 625             srenew(b->a,maxentries);
 626           }
 627           b->a[i]=strtol(str, NULL, 10)-1;
 628           b->index[b->nr]++;
 629           (b->nra)++;
 630           pt=strstr(pt,str)+strlen(str);
 631         }
 632       }
 633     } while (get_a_line(in,line,STRLEN));
 634   }
 635   else {
 636     /* old format */
 637     sscanf(line,"%d%d",&b->nr,&b->nra);
 638     snew(b->index,b->nr+1);
 639     snew(*grpname,b->nr);
 640     b->index[0]=0;
 641     snew(b->a,b->nra);
 642     for (i=0; (i<b->nr); i++) {
 643       nread=fscanf(in,"%s%d",str,&ng);
 644       (*grpname)[i]=strdup(str);
 645       b->index[i+1]=b->index[i]+ng;
 646       if (b->index[i+1] > b->nra)
 647         gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 648       for(j=0; (j<ng); j++) {
 649         nread=fscanf(in,"%d",&a);
 650         b->a[b->index[i]+j]=a;
 651       }
 652     }
 653   }
 654   gmx_fio_fclose(in);
 655
 656   for(i=0; (i<b->nr); i++) {
 657     for(j=b->index[i]; (j<b->index[i+1]); j++) {
 658       if (b->a[j] < 0)
 659         fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 660                 b->a[j],(*grpname)[i]);
 661     }
 662   }
 663
 664   return b;
 665 }
 666
 667 static void minstring(char *str)
 668 {
 669   int i;
 670
 671   for (i=0; (i < (int)strlen(str)); i++)
 672     if (str[i]=='-')
 673       str[i]='_';
 674 }
 675
 676 int find_group(char s[], int ngrps, char **grpname)
 677 {
 678   int aa, i, n;
 679   char string[STRLEN];
 680   bool bMultiple;
 681
 682   bMultiple = FALSE;
 683   n = strlen(s);
 684   aa=NOTSET;
 685   /* first look for whole name match */
 686   if (aa==NOTSET)
 687     for(i=0; i<ngrps; i++)
 688       if (strcasecmp_min(s,grpname[i])==0) {
 689         if(aa!=NOTSET)
 690           bMultiple = TRUE;
 691         aa=i;
 692       }
 693   /* second look for first string match */
 694   if (aa==NOTSET)
 695     for(i=0; i<ngrps; i++)
 696       if (strncasecmp_min(s,grpname[i],n)==0) {
 697         if(aa!=NOTSET)
 698           bMultiple = TRUE;
 699         aa=i;
 700       }
 701   /* last look for arbitrary substring match */
 702   if (aa==NOTSET) {
 703     upstring(s);
 704     minstring(s);
 705     for(i=0; i<ngrps; i++) {
 706       strcpy(string, grpname[i]);
 707       upstring(string);
 708       minstring(string);
 709       if (strstr(string,s)!=NULL) {
 710         if(aa!=NOTSET)
 711           bMultiple = TRUE;
 712         aa=i;
 713       }
 714     }
 715   }
 716   if (bMultiple) {
 717     printf("Error: Multiple groups '%s' selected\n", s);
 718     aa=NOTSET;
 719   }
 720   return aa;
 721 }
 722
 723 static int qgroup(int *a, int ngrps, char **grpname)
 724 {
 725     char s[STRLEN];
 726     int  aa;
 727     bool bInRange;
 728     char *end;
 729
 730     do {
 731         fprintf(stderr,"Select a group: ");
 732         do {
 733             if ( scanf("%s",s)!=1 )
 734                 gmx_fatal(FARGS,"Cannot read from input");
 735             trim(s); /* remove spaces */
 736         } while (strlen(s)==0);
 737         aa = strtol(s, &end, 10);
 738         if (aa==0 && end[0] != '\0') /* string entered */
 739             aa = find_group(s, ngrps, grpname);
 740         bInRange = (aa >= 0 && aa < ngrps);
 741         if (!bInRange)
 742             printf("Error: No such group '%s'\n", s);
 743     } while (!bInRange);
 744     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 745     *a = aa;
 746     return aa;
 747 }
 748
 749 static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
 750                       int ngrps,int isize[],atom_id *index[],int grpnr[])
 751 {
 752   int i,j,gnr1;
 753
 754   if (grps->nr==0)
 755     gmx_fatal(FARGS,"Error: no groups in indexfile");
 756   for(i=0; (i<grps->nr); i++)
 757     fprintf(stderr,"Group %5d (%12s) has %5d elements\n",i,grpname[i],
 758            grps->index[i+1]-grps->index[i]);
 759   for(i=0; (i<ngrps); i++) {
 760     if (grps->nr > 1)
 761       do {
 762         gnr1=qgroup(&grpnr[i], grps->nr, grpname);
 763         if ((gnr1<0) || (gnr1>=grps->nr))
 764           fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
 765       } while ((gnr1<0) || (gnr1>=grps->nr));
 766     else {
 767       fprintf(stderr,"There is one group in the index\n");
 768       gnr1=0;
 769     }
 770     gnames[i]=strdup(grpname[gnr1]);
 771     isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
 772     snew(index[i],isize[i]);
 773     for(j=0; (j<isize[i]); j++)
 774       index[i][j]=grps->a[grps->index[gnr1]+j];
 775   }
 776 }
 777
 778 void rd_index(const char *statfile,int ngrps,int isize[],
 779               atom_id *index[],char *grpnames[])
 780 {
 781   char    **gnames;
 782   t_blocka *grps;
 783   int     *grpnr;
 784
 785   snew(grpnr,ngrps);
 786   if (!statfile)
 787     gmx_fatal(FARGS,"No index file specified");
 788   grps=init_index(statfile,&gnames);
 789   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
 790 }
 791
 792 void rd_index_nrs(char *statfile,int ngrps,int isize[],
 793                   atom_id *index[],char *grpnames[],int grpnr[])
 794 {
 795   char    **gnames;
 796   t_blocka *grps;
 797
 798   if (!statfile)
 799     gmx_fatal(FARGS,"No index file specified");
 800   grps=init_index(statfile,&gnames);
 801
 802   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
 803 }
 804
 805 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
 806                int isize[], atom_id *index[],char *grpnames[])
 807 {
 808   char    ***gnames;
 809   t_blocka *grps = NULL;
 810   int     *grpnr;
 811
 812   snew(grpnr,ngrps);
 813   snew(gnames,1);
 814   if (fnm != NULL) {
 815     grps=init_index(fnm,gnames);
 816   }
 817   else if (atoms) {
 818     snew(grps,1);
 819     snew(grps->index,1);
 820     analyse(atoms,grps,gnames,FALSE,FALSE);
 821   }
 822   else
 823     gmx_incons("You need to supply a valid atoms structure or a valid index file name");
 824
 825   rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
 826 }
 827
 828 t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
 829 {
 830   t_cluster_ndx *c;
 831   int i;
 832
 833   snew(c,1);
 834   c->clust     = init_index(ndx,&c->grpname);
 835   c->maxframe = -1;
 836   for(i=0; (i<c->clust->nra); i++)
 837     c->maxframe = max(c->maxframe,c->clust->a[i]);
 838   fprintf(fplog ? fplog : stdout,
 839           "There are %d clusters containing %d structures, highest framenr is %d\n",
 840           c->clust->nr,c->clust->nra,c->maxframe);
 841   if (debug) {
 842     pr_blocka(debug,0,"clust",c->clust,TRUE);
 843     for(i=0; (i<c->clust->nra); i++)
 844       if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
 845         gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
 846                   "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
 847   }
 848   c->inv_clust=make_invblocka(c->clust,c->maxframe);
 849
 850   return c;
 851 }
 852