3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * GROningen Mixture of Alchemy and Childrens' Stories
59 typedef enum { etOther
, etProt
, etDNA
, erestNR
} eRestp
;
60 static const char *ResTP
[erestNR
] = { "OTHER", "PROTEIN", "DNA" };
62 static const char *Sugars
[] = { "A", "T", "G", "C", "U" };
63 #define NDNA asize(Sugars)
65 static const char *Negate
[] = { "SOL" };
66 #define NNEGATE asize(Negate)
68 static bool gmx_ask_yesno(bool bASK
)
74 c
=toupper(fgetc(stdin
));
75 } while ((c
!= 'Y') && (c
!= 'N'));
83 t_blocka
*new_blocka(void)
93 void write_index(const char *outf
, t_blocka
*b
,char **gnames
)
98 out
=gmx_fio_fopen(outf
,"w");
99 /* fprintf(out,"%5d %5d\n",b->nr,b->nra); */
100 for(i
=0; (i
<b
->nr
); i
++) {
101 fprintf(out
,"[ %s ]\n",gnames
[i
]);
102 for(k
=0,j
=b
->index
[i
]; j
<b
->index
[i
+1]; j
++,k
++) {
103 fprintf(out
,"%4d ",b
->a
[j
]+1);
112 void add_grp(t_blocka
*b
,char ***gnames
,int nra
,atom_id a
[],const char *name
)
116 srenew(b
->index
,b
->nr
+2);
117 srenew(*gnames
,b
->nr
+1);
118 (*gnames
)[b
->nr
]=strdup(name
);
120 srenew(b
->a
,b
->nra
+nra
);
121 for(i
=0; (i
<nra
); i
++)
124 b
->index
[b
->nr
]=b
->nra
;
127 /* compare index in `a' with group in `b' at `index',
128 when `index'<0 it is relative to end of `b' */
129 static bool grp_cmp(t_blocka
*b
, int nra
, atom_id a
[], int index
)
134 index
= b
->nr
-1+index
;
136 gmx_fatal(FARGS
,"no such index group %d in t_blocka (nr=%d)",index
,b
->nr
);
138 if ( nra
!= b
->index
[index
+1] - b
->index
[index
] )
141 if ( a
[i
] != b
->a
[b
->index
[index
]+i
] )
146 static void p_status(int nres
,eRestp restp
[],bool bVerb
)
148 int i
,j
,ntp
[erestNR
];
150 for(i
=0; (i
<erestNR
); i
++)
152 for(j
=0; (j
<nres
); j
++)
156 for(i
=0; (i
<erestNR
); i
++)
157 printf("There are: %5d %10s residues\n",ntp
[i
],ResTP
[i
]);
160 atom_id
*mk_aid(t_atoms
*atoms
,eRestp restp
[],eRestp res
,int *nra
,
162 /* Make an array of atom_ids for all atoms with:
163 * (restp[i] == res) == bTrue
171 for(i
=0; (i
<atoms
->nr
); i
++)
172 if ((restp
[atoms
->atom
[i
].resind
] == res
) == bTrue
)
184 static void analyse_other(eRestp Restp
[],t_atoms
*atoms
,
185 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
190 atom_id
*other_ndx
,*aid
,*aaid
;
191 int i
,j
,k
,l
,resind
,naid
,naaid
,natp
,nrestp
=0;
193 for(i
=0; (i
<atoms
->nres
); i
++)
194 if (Restp
[i
] == etOther
)
196 if (i
< atoms
->nres
) {
199 printf("Analysing Other...\n");
200 snew(other_ndx
,atoms
->nr
);
201 for(k
=0; (k
<atoms
->nr
); k
++) {
202 resind
= atoms
->atom
[k
].resind
;
203 rname
= *atoms
->resinfo
[resind
].name
;
204 if (Restp
[resind
] == etOther
) {
205 for(l
=0; (l
<nrestp
); l
++)
206 if (strcmp(restp
[l
].rname
,rname
) == 0)
209 srenew(restp
,nrestp
+1);
210 restp
[nrestp
].rname
= strdup(rname
);
211 restp
[nrestp
].bNeg
= FALSE
;
212 restp
[nrestp
].gname
= strdup(rname
);
214 for(i
=0; i
<NNEGATE
; i
++) {
215 if (strcmp(rname
,Negate
[i
]) == 0) {
216 srenew(restp
,nrestp
+1);
217 restp
[nrestp
].rname
= strdup(rname
);
218 restp
[nrestp
].bNeg
= TRUE
;
219 snew(restp
[nrestp
].gname
,4+strlen(rname
)+1);
220 sprintf(restp
[nrestp
].gname
,"%s%s","non-",rname
);
227 for(i
=0; (i
<nrestp
); i
++) {
230 for(j
=0; (j
<atoms
->nr
); j
++) {
231 rname
= *atoms
->resinfo
[atoms
->atom
[j
].resind
].name
;
232 if ((strcmp(restp
[i
].rname
,rname
) == 0 && !restp
[i
].bNeg
) ||
233 (strcmp(restp
[i
].rname
,rname
) != 0 && restp
[i
].bNeg
)) {
237 add_grp(gb
,gn
,naid
,aid
,restp
[i
].gname
);
239 printf("split %s into atoms (y/n) ? ",restp
[i
].gname
);
241 if (gmx_ask_yesno(bASK
)) {
243 for(k
=0; (k
<naid
); k
++) {
244 aname
=*atoms
->atomname
[aid
[k
]];
245 for(l
=0; (l
<natp
); l
++)
246 if (strcmp(aname
,attp
[l
]) == 0)
254 for(l
=0; (l
<natp
); l
++) {
257 for(k
=0; (k
<naid
); k
++) {
258 aname
=*atoms
->atomname
[aid
[k
]];
259 if (strcmp(aname
,attp
[l
])==0)
260 aaid
[naaid
++]=aid
[k
];
262 add_grp(gb
,gn
,naaid
,aaid
,attp
[l
]);
276 static void analyse_prot(eRestp restp
[],t_atoms
*atoms
,
277 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
279 /* atomnames to be used in constructing index groups: */
280 static const char *pnoh
[] = { "H" };
281 static const char *pnodum
[] = { "MN1", "MN2", "MCB1", "MCB2", "MCG1", "MCG2",
282 "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
283 static const char *calpha
[] = { "CA" };
284 static const char *bb
[] = { "N","CA","C" };
285 static const char *mc
[] = { "N","CA","C","O","O1","O2","OXT" };
286 static const char *mcb
[] = { "N","CA","CB","C","O","O1","O2","OT","OXT" };
287 static const char *mch
[] = { "N","CA","C","O","O1","O2","OT","OXT",
288 "H1","H2","H3","H" };
289 /* array of arrays of atomnames: */
290 static const char **chains
[] = { NULL
,pnoh
,calpha
,bb
,mc
,mcb
,mch
,mch
,mch
,pnodum
};
291 #define NCH asize(chains)
292 /* array of sizes of arrays of atomnames: */
293 const int sizes
[NCH
] = {
294 0, asize(pnoh
), asize(calpha
), asize(bb
),
295 asize(mc
), asize(mcb
), asize(mch
), asize(mch
), asize(mch
), asize(pnodum
)
297 /* descriptive names of index groups */
298 const char *ch_name
[NCH
] = {
299 "Protein", "Protein-H", "C-alpha", "Backbone",
300 "MainChain", "MainChain+Cb", "MainChain+H", "SideChain", "SideChain-H",
303 /* construct index group containing (TRUE) or excluding (FALSE)
305 const bool complement
[NCH
] = {
306 TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, TRUE
, TRUE
, TRUE
308 const int wholename
[NCH
] = { -1, 0,-1,-1,-1,-1,-1,-1, 11,-1 };
309 /* the index in wholename gives the first item in the arrays of
310 * atomtypes that should be tested with 'strncasecmp' in stead of
311 * strcasecmp, or -1 if all items should be tested with strcasecmp
312 * This is comparable to using a '*' wildcard at the end of specific
313 * atom names, but that is more involved to implement...
315 /* only add index group if it differs from the specified one,
316 specify -1 to always add group */
317 const int compareto
[NCH
] = { -1,-1,-1,-1,-1,-1,-1,-1,-1, 0 };
321 int nra
,nnpres
,npres
;
323 char ndx_name
[STRLEN
],*atnm
;
327 printf("Analysing Protein...\n");
330 /* calculate the number of protein residues */
332 for(i
=0; (i
<atoms
->nres
); i
++)
333 if (restp
[i
] == etProt
)
336 /* find matching or complement atoms */
337 for(i
=0; (i
<(int)NCH
); i
++) {
339 for(n
=0; (n
<atoms
->nr
); n
++) {
340 if (restp
[atoms
->atom
[n
].resind
] == etProt
) {
342 for(j
=0; (j
<sizes
[i
]); j
++) {
343 /* skip digits at beginning of atomname, e.g. 1H */
344 atnm
=*atoms
->atomname
[n
];
345 while (isdigit(atnm
[0]))
347 if ( (wholename
[i
]==-1) || (j
<wholename
[i
]) ) {
348 if (strcasecmp(chains
[i
][j
],atnm
) == 0)
351 if (strncasecmp(chains
[i
][j
],atnm
,strlen(chains
[i
][j
])) == 0)
355 if (match
!= complement
[i
])
359 /* if we want to add this group always or it differs from previous
361 if ( compareto
[i
] == -1 || !grp_cmp(gb
,nra
,aid
,compareto
[i
]-i
) )
362 add_grp(gb
,gn
,nra
,aid
,ch_name
[i
]);
366 for(i
=0; (i
<(int)NCH
); i
++) {
367 printf("Split %12s into %5d residues (y/n) ? ",ch_name
[i
],npres
);
368 if (gmx_ask_yesno(bASK
)) {
371 for(n
=0;((atoms
->atom
[n
].resind
< npres
) && (n
<atoms
->nr
));) {
372 resind
= atoms
->atom
[n
].resind
;
373 for(;((atoms
->atom
[n
].resind
==resind
) && (n
<atoms
->nr
));n
++) {
375 for(j
=0;(j
<sizes
[i
]); j
++)
376 if (strcasecmp(chains
[i
][j
],*atoms
->atomname
[n
]) == 0)
378 if (match
!= complement
[i
])
381 /* copy the residuename to the tail of the groupname */
384 ri
= &atoms
->resinfo
[resind
];
385 sprintf(ndx_name
,"%s_%s%d%c",
386 ch_name
[i
],*ri
->name
,ri
->nr
,ri
->ic
==' ' ? '\0' : ri
->ic
);
387 add_grp(gb
,gn
,nra
,aid
,ndx_name
);
393 printf("Make group with sidechain and C=O swapped (y/n) ? ");
394 if (gmx_ask_yesno(bASK
)) {
395 /* Make swap sidechain C=O index */
398 for(n
=0;((atoms
->atom
[n
].resind
< npres
) && (n
<atoms
->nr
));) {
399 resind
= atoms
->atom
[n
].resind
;
401 for(;((atoms
->atom
[n
].resind
==resind
) && (n
<atoms
->nr
));n
++)
402 if (strcmp("CA",*atoms
->atomname
[n
]) == 0) {
406 } else if (strcmp("C",*atoms
->atomname
[n
]) == 0) {
408 gmx_incons("Atom naming problem");
410 } else if (strcmp("O",*atoms
->atomname
[n
]) == 0) {
412 gmx_incons("Atom naming problem");
414 } else if (strcmp("O1",*atoms
->atomname
[n
]) == 0) {
416 gmx_incons("Atom naming problem");
421 /* copy the residuename to the tail of the groupname */
423 add_grp(gb
,gn
,nra
,aid
,"SwapSC-CO");
431 static void analyse_dna(eRestp restp
[],t_atoms
*atoms
,
432 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
435 printf("Analysing DNA... (not really)\n");
437 printf("eRestp %p; atoms %p; gb %p; gn %p; bASK %s; bASK %s",
438 (void *)restp
, (void *)atoms
, (void *)gb
, (void *)gn
,
439 bool_names
[bASK
], bool_names
[bVerb
]);
442 t_aa_names
*get_aa_names(void)
444 /* Read the database in aminoacids.dat */
449 aan
->n
= get_strings("aminoacids.dat",&(aan
->aa
));
450 /* qsort(aan->aa,aan->n,sizeof(aan->aa[0]),strcmp);*/
455 bool is_protein(t_aa_names
*aan
,char *resnm
)
457 /* gives true if resnm occurs in aminoacids.dat */
460 for(i
=0; (i
<aan
->n
); i
++)
461 if (strcasecmp(aan
->aa
[i
],resnm
) == 0)
467 void done_aa_names(t_aa_names
**aan
)
472 for(i
=0; (i
<(*aan
)->n
); i
++)
473 sfree((*aan
)->aa
[i
]);
479 void analyse(t_atoms
*atoms
,t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
490 printf("Analysing residue names:\n");
491 snew(restp
,atoms
->nres
);
492 aid
=mk_aid(atoms
,restp
,etOther
,&nra
,TRUE
);
493 add_grp(gb
,gn
,nra
,aid
,"System");
496 aan
= get_aa_names();
497 for(i
=0; (i
<atoms
->nres
); i
++) {
498 resnm
= *atoms
->resinfo
[i
].name
;
499 if ((restp
[i
] == etOther
) && is_protein(aan
,resnm
))
501 if (restp
[i
] == etOther
)
502 for(j
=0; (j
<NDNA
); j
++) {
503 if (strcasecmp(Sugars
[j
],resnm
) == 0)
507 p_status(atoms
->nres
,restp
,bVerb
);
511 aid
=mk_aid(atoms
,restp
,etProt
,&nra
,TRUE
);
513 analyse_prot(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
518 aid
=mk_aid(atoms
,restp
,etProt
,&nra
,FALSE
);
519 if ((nra
> 0) && (nra
< atoms
->nr
))
520 add_grp(gb
,gn
,nra
,aid
,"non-Protein");
524 aid
=mk_aid(atoms
,restp
,etDNA
,&nra
,TRUE
);
526 add_grp(gb
,gn
,nra
,aid
,"DNA");
527 analyse_dna(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
532 analyse_other(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
537 void check_index(char *gname
,int n
,atom_id index
[],char *traj
,int natoms
)
542 if (index
[i
] >= natoms
)
543 gmx_fatal(FARGS
,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
544 gname
? gname
: "Index",i
+1, index
[i
]+1,
545 traj
? traj
: "the trajectory",natoms
);
546 else if (index
[i
] < 0)
547 gmx_fatal(FARGS
,"%s atom number (index[%d]=%d) is less than zero",
548 gname
? gname
: "Index",i
+1, index
[i
]+1);
551 t_blocka
*init_index(const char *gfile
, char ***grpname
)
557 char line
[STRLEN
],*pt
,str
[STRLEN
];
559 in
=gmx_fio_fopen(gfile
,"r");
561 get_a_line(in
,line
,STRLEN
);
562 if ( line
[0]=='[' ) {
571 if (get_header(line
,str
)) {
573 srenew(b
->index
,b
->nr
+1);
574 srenew(*grpname
,b
->nr
);
577 b
->index
[b
->nr
]=b
->index
[b
->nr
-1];
578 (*grpname
)[b
->nr
-1]=strdup(str
);
581 while ((i
=sscanf(pt
,"%s",str
)) == 1) {
585 srenew(b
->a
,maxentries
);
587 b
->a
[i
]=strtol(str
, NULL
, 10)-1;
590 pt
=strstr(pt
,str
)+strlen(str
);
593 } while (get_a_line(in
,line
,STRLEN
));
597 sscanf(line
,"%d%d",&b
->nr
,&b
->nra
);
598 snew(b
->index
,b
->nr
+1);
599 snew(*grpname
,b
->nr
);
602 for (i
=0; (i
<b
->nr
); i
++) {
603 nread
=fscanf(in
,"%s%d",str
,&ng
);
604 (*grpname
)[i
]=strdup(str
);
605 b
->index
[i
+1]=b
->index
[i
]+ng
;
606 if (b
->index
[i
+1] > b
->nra
)
607 gmx_fatal(FARGS
,"Something wrong in your indexfile at group %s",str
);
608 for(j
=0; (j
<ng
); j
++) {
609 nread
=fscanf(in
,"%d",&a
);
610 b
->a
[b
->index
[i
]+j
]=a
;
616 for(i
=0; (i
<b
->nr
); i
++) {
617 for(j
=b
->index
[i
]; (j
<b
->index
[i
+1]); j
++) {
619 fprintf(stderr
,"\nWARNING: negative index %d in group %s\n\n",
620 b
->a
[j
],(*grpname
)[i
]);
627 static void minstring(char *str
)
631 for (i
=0; (i
< (int)strlen(str
)); i
++)
636 int find_group(char s
[], int ngrps
, char **grpname
)
645 /* first look for whole name match */
647 for(i
=0; i
<ngrps
; i
++)
648 if (strcasecmp_min(s
,grpname
[i
])==0) {
653 /* second look for first string match */
655 for(i
=0; i
<ngrps
; i
++)
656 if (strncasecmp_min(s
,grpname
[i
],n
)==0) {
661 /* last look for arbitrary substring match */
665 for(i
=0; i
<ngrps
; i
++) {
666 strcpy(string
, grpname
[i
]);
669 if (strstr(string
,s
)!=NULL
) {
677 printf("Error: Multiple groups '%s' selected\n", s
);
683 static int qgroup(int *a
, int ngrps
, char **grpname
)
691 fprintf(stderr
,"Select a group: ");
693 if ( scanf("%s",s
)!=1 )
694 gmx_fatal(FARGS
,"Cannot read from input");
695 trim(s
); /* remove spaces */
696 } while (strlen(s
)==0);
697 aa
= strtol(s
, &end
, 10);
698 if (aa
==0 && end
[0] != '\0') /* string entered */
699 aa
= find_group(s
, ngrps
, grpname
);
700 bInRange
= (aa
>= 0 && aa
< ngrps
);
702 printf("Error: No such group '%s'\n", s
);
704 printf("Selected %d: '%s'\n", aa
, grpname
[aa
]);
709 static void rd_groups(t_blocka
*grps
,char **grpname
,char *gnames
[],
710 int ngrps
,int isize
[],atom_id
*index
[],int grpnr
[])
715 gmx_fatal(FARGS
,"Error: no groups in indexfile");
716 for(i
=0; (i
<grps
->nr
); i
++)
717 fprintf(stderr
,"Group %5d (%12s) has %5d elements\n",i
,grpname
[i
],
718 grps
->index
[i
+1]-grps
->index
[i
]);
719 for(i
=0; (i
<ngrps
); i
++) {
722 gnr1
=qgroup(&grpnr
[i
], grps
->nr
, grpname
);
723 if ((gnr1
<0) || (gnr1
>=grps
->nr
))
724 fprintf(stderr
,"Select between %d and %d.\n",0,grps
->nr
-1);
725 } while ((gnr1
<0) || (gnr1
>=grps
->nr
));
727 fprintf(stderr
,"There is one group in the index\n");
730 gnames
[i
]=strdup(grpname
[gnr1
]);
731 isize
[i
]=grps
->index
[gnr1
+1]-grps
->index
[gnr1
];
732 snew(index
[i
],isize
[i
]);
733 for(j
=0; (j
<isize
[i
]); j
++)
734 index
[i
][j
]=grps
->a
[grps
->index
[gnr1
]+j
];
738 void rd_index(const char *statfile
,int ngrps
,int isize
[],
739 atom_id
*index
[],char *grpnames
[])
747 gmx_fatal(FARGS
,"No index file specified");
748 grps
=init_index(statfile
,&gnames
);
749 rd_groups(grps
,gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
752 void rd_index_nrs(char *statfile
,int ngrps
,int isize
[],
753 atom_id
*index
[],char *grpnames
[],int grpnr
[])
759 gmx_fatal(FARGS
,"No index file specified");
760 grps
=init_index(statfile
,&gnames
);
762 rd_groups(grps
,gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
765 void get_index(t_atoms
*atoms
, const char *fnm
, int ngrps
,
766 int isize
[], atom_id
*index
[],char *grpnames
[])
769 t_blocka
*grps
= NULL
;
775 grps
=init_index(fnm
,gnames
);
780 analyse(atoms
,grps
,gnames
,FALSE
,FALSE
);
783 gmx_incons("You need to supply a valid atoms structure or a valid index file name");
785 rd_groups(grps
,*gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
788 t_cluster_ndx
*cluster_index(FILE *fplog
,const char *ndx
)
794 c
->clust
= init_index(ndx
,&c
->grpname
);
796 for(i
=0; (i
<c
->clust
->nra
); i
++)
797 c
->maxframe
= max(c
->maxframe
,c
->clust
->a
[i
]);
798 fprintf(fplog
? fplog
: stdout
,
799 "There are %d clusters containing %d structures, highest framenr is %d\n",
800 c
->clust
->nr
,c
->clust
->nra
,c
->maxframe
);
802 pr_blocka(debug
,0,"clust",c
->clust
,TRUE
);
803 for(i
=0; (i
<c
->clust
->nra
); i
++)
804 if ((c
->clust
->a
[i
] < 0) || (c
->clust
->a
[i
] > c
->maxframe
))
805 gmx_fatal(FARGS
,"Range check error for c->clust->a[%d] = %d\n"
806 "should be within 0 and %d",i
,c
->clust
->a
[i
],c
->maxframe
+1);
808 c
->inv_clust
=make_invblocka(c
->clust
,c
->maxframe
);