3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * GROningen Mixture of Alchemy and Childrens' Stories
59 typedef enum { etOther
, etProt
, etDNA
, etRNA
, erestNR
} eRestp
;
60 static const char *ResTP
[erestNR
] = { "OTHER", "PROTEIN", "DNA", "RNA" };
62 static const char *DNA
[] = { "DA", "DT", "DG", "DC", "DU" };
63 #define NDNA asize(DNA)
65 static const char *RNA
[] = { "A", "T", "G", "C", "U" };
66 #define NRNA asize(RNA)
68 static const char *Negate
[] = { "SOL" };
69 #define NNEGATE asize(Negate)
71 static bool gmx_ask_yesno(bool bASK
)
77 c
=toupper(fgetc(stdin
));
78 } while ((c
!= 'Y') && (c
!= 'N'));
86 t_blocka
*new_blocka(void)
96 void write_index(const char *outf
, t_blocka
*b
,char **gnames
)
101 out
=gmx_fio_fopen(outf
,"w");
102 /* fprintf(out,"%5d %5d\n",b->nr,b->nra); */
103 for(i
=0; (i
<b
->nr
); i
++) {
104 fprintf(out
,"[ %s ]\n",gnames
[i
]);
105 for(k
=0,j
=b
->index
[i
]; j
<b
->index
[i
+1]; j
++,k
++) {
106 fprintf(out
,"%4d ",b
->a
[j
]+1);
115 void add_grp(t_blocka
*b
,char ***gnames
,int nra
,atom_id a
[],const char *name
)
119 srenew(b
->index
,b
->nr
+2);
120 srenew(*gnames
,b
->nr
+1);
121 (*gnames
)[b
->nr
]=strdup(name
);
123 srenew(b
->a
,b
->nra
+nra
);
124 for(i
=0; (i
<nra
); i
++)
127 b
->index
[b
->nr
]=b
->nra
;
130 /* compare index in `a' with group in `b' at `index',
131 when `index'<0 it is relative to end of `b' */
132 static bool grp_cmp(t_blocka
*b
, int nra
, atom_id a
[], int index
)
137 index
= b
->nr
-1+index
;
139 gmx_fatal(FARGS
,"no such index group %d in t_blocka (nr=%d)",index
,b
->nr
);
141 if ( nra
!= b
->index
[index
+1] - b
->index
[index
] )
144 if ( a
[i
] != b
->a
[b
->index
[index
]+i
] )
149 static void p_status(int nres
,eRestp restp
[],bool bVerb
)
151 int i
,j
,ntp
[erestNR
];
153 for(i
=0; (i
<erestNR
); i
++)
155 for(j
=0; (j
<nres
); j
++)
159 for(i
=0; (i
<erestNR
); i
++)
160 printf("There are: %5d %10s residues\n",ntp
[i
],ResTP
[i
]);
163 atom_id
*mk_aid(t_atoms
*atoms
,eRestp restp
[],eRestp res
,int *nra
,
165 /* Make an array of atom_ids for all atoms with:
166 * (restp[i] == res) == bTrue
174 for(i
=0; (i
<atoms
->nr
); i
++)
175 if ((restp
[atoms
->atom
[i
].resind
] == res
) == bTrue
)
187 static void analyse_other(eRestp Restp
[],t_atoms
*atoms
,
188 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
193 atom_id
*other_ndx
,*aid
,*aaid
;
194 int i
,j
,k
,l
,resind
,naid
,naaid
,natp
,nrestp
=0;
196 for(i
=0; (i
<atoms
->nres
); i
++)
197 if (Restp
[i
] == etOther
)
199 if (i
< atoms
->nres
) {
202 printf("Analysing Other...\n");
203 snew(other_ndx
,atoms
->nr
);
204 for(k
=0; (k
<atoms
->nr
); k
++) {
205 resind
= atoms
->atom
[k
].resind
;
206 rname
= *atoms
->resinfo
[resind
].name
;
207 if (Restp
[resind
] == etOther
) {
208 for(l
=0; (l
<nrestp
); l
++)
209 if (strcmp(restp
[l
].rname
,rname
) == 0)
212 srenew(restp
,nrestp
+1);
213 restp
[nrestp
].rname
= strdup(rname
);
214 restp
[nrestp
].bNeg
= FALSE
;
215 restp
[nrestp
].gname
= strdup(rname
);
217 for(i
=0; i
<NNEGATE
; i
++) {
218 if (strcmp(rname
,Negate
[i
]) == 0) {
219 srenew(restp
,nrestp
+1);
220 restp
[nrestp
].rname
= strdup(rname
);
221 restp
[nrestp
].bNeg
= TRUE
;
222 snew(restp
[nrestp
].gname
,4+strlen(rname
)+1);
223 sprintf(restp
[nrestp
].gname
,"%s%s","non-",rname
);
230 for(i
=0; (i
<nrestp
); i
++) {
233 for(j
=0; (j
<atoms
->nr
); j
++) {
234 rname
= *atoms
->resinfo
[atoms
->atom
[j
].resind
].name
;
235 if ((strcmp(restp
[i
].rname
,rname
) == 0 && !restp
[i
].bNeg
) ||
236 (strcmp(restp
[i
].rname
,rname
) != 0 && restp
[i
].bNeg
)) {
240 add_grp(gb
,gn
,naid
,aid
,restp
[i
].gname
);
242 printf("split %s into atoms (y/n) ? ",restp
[i
].gname
);
244 if (gmx_ask_yesno(bASK
)) {
246 for(k
=0; (k
<naid
); k
++) {
247 aname
=*atoms
->atomname
[aid
[k
]];
248 for(l
=0; (l
<natp
); l
++)
249 if (strcmp(aname
,attp
[l
]) == 0)
257 for(l
=0; (l
<natp
); l
++) {
260 for(k
=0; (k
<naid
); k
++) {
261 aname
=*atoms
->atomname
[aid
[k
]];
262 if (strcmp(aname
,attp
[l
])==0)
263 aaid
[naaid
++]=aid
[k
];
265 add_grp(gb
,gn
,naaid
,aaid
,attp
[l
]);
279 static void analyse_prot(eRestp restp
[],t_atoms
*atoms
,
280 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
282 /* atomnames to be used in constructing index groups: */
283 static const char *pnoh
[] = { "H" };
284 static const char *pnodum
[] = { "MN1", "MN2", "MCB1", "MCB2", "MCG1", "MCG2",
285 "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
286 static const char *calpha
[] = { "CA" };
287 static const char *bb
[] = { "N","CA","C" };
288 static const char *mc
[] = { "N","CA","C","O","O1","O2","OXT" };
289 static const char *mcb
[] = { "N","CA","CB","C","O","O1","O2","OT","OXT" };
290 static const char *mch
[] = { "N","CA","C","O","O1","O2","OT","OXT",
291 "H1","H2","H3","H" };
292 /* array of arrays of atomnames: */
293 static const char **chains
[] = { NULL
,pnoh
,calpha
,bb
,mc
,mcb
,mch
,mch
,mch
,pnodum
};
294 #define NCH asize(chains)
295 /* array of sizes of arrays of atomnames: */
296 const int sizes
[NCH
] = {
297 0, asize(pnoh
), asize(calpha
), asize(bb
),
298 asize(mc
), asize(mcb
), asize(mch
), asize(mch
), asize(mch
), asize(pnodum
)
300 /* descriptive names of index groups */
301 const char *ch_name
[NCH
] = {
302 "Protein", "Protein-H", "C-alpha", "Backbone",
303 "MainChain", "MainChain+Cb", "MainChain+H", "SideChain", "SideChain-H",
306 /* construct index group containing (TRUE) or excluding (FALSE)
308 const bool complement
[NCH
] = {
309 TRUE
, TRUE
, FALSE
, FALSE
, FALSE
, FALSE
, FALSE
, TRUE
, TRUE
, TRUE
311 const int wholename
[NCH
] = { -1, 0,-1,-1,-1,-1,-1,-1, 11,-1 };
312 /* the index in wholename gives the first item in the arrays of
313 * atomtypes that should be tested with 'strncasecmp' in stead of
314 * strcasecmp, or -1 if all items should be tested with strcasecmp
315 * This is comparable to using a '*' wildcard at the end of specific
316 * atom names, but that is more involved to implement...
318 /* only add index group if it differs from the specified one,
319 specify -1 to always add group */
320 const int compareto
[NCH
] = { -1,-1,-1,-1,-1,-1,-1,-1,-1, 0 };
324 int nra
,nnpres
,npres
;
326 char ndx_name
[STRLEN
],*atnm
;
330 printf("Analysing Protein...\n");
333 /* calculate the number of protein residues */
335 for(i
=0; (i
<atoms
->nres
); i
++)
336 if (restp
[i
] == etProt
)
339 /* find matching or complement atoms */
340 for(i
=0; (i
<(int)NCH
); i
++) {
342 for(n
=0; (n
<atoms
->nr
); n
++) {
343 if (restp
[atoms
->atom
[n
].resind
] == etProt
) {
345 for(j
=0; (j
<sizes
[i
]); j
++) {
346 /* skip digits at beginning of atomname, e.g. 1H */
347 atnm
=*atoms
->atomname
[n
];
348 while (isdigit(atnm
[0]))
350 if ( (wholename
[i
]==-1) || (j
<wholename
[i
]) ) {
351 if (strcasecmp(chains
[i
][j
],atnm
) == 0)
354 if (strncasecmp(chains
[i
][j
],atnm
,strlen(chains
[i
][j
])) == 0)
358 if (match
!= complement
[i
])
362 /* if we want to add this group always or it differs from previous
364 if ( compareto
[i
] == -1 || !grp_cmp(gb
,nra
,aid
,compareto
[i
]-i
) )
365 add_grp(gb
,gn
,nra
,aid
,ch_name
[i
]);
369 for(i
=0; (i
<(int)NCH
); i
++) {
370 printf("Split %12s into %5d residues (y/n) ? ",ch_name
[i
],npres
);
371 if (gmx_ask_yesno(bASK
)) {
374 for(n
=0;((atoms
->atom
[n
].resind
< npres
) && (n
<atoms
->nr
));) {
375 resind
= atoms
->atom
[n
].resind
;
376 for(;((atoms
->atom
[n
].resind
==resind
) && (n
<atoms
->nr
));n
++) {
378 for(j
=0;(j
<sizes
[i
]); j
++)
379 if (strcasecmp(chains
[i
][j
],*atoms
->atomname
[n
]) == 0)
381 if (match
!= complement
[i
])
384 /* copy the residuename to the tail of the groupname */
387 ri
= &atoms
->resinfo
[resind
];
388 sprintf(ndx_name
,"%s_%s%d%c",
389 ch_name
[i
],*ri
->name
,ri
->nr
,ri
->ic
==' ' ? '\0' : ri
->ic
);
390 add_grp(gb
,gn
,nra
,aid
,ndx_name
);
396 printf("Make group with sidechain and C=O swapped (y/n) ? ");
397 if (gmx_ask_yesno(bASK
)) {
398 /* Make swap sidechain C=O index */
401 for(n
=0;((atoms
->atom
[n
].resind
< npres
) && (n
<atoms
->nr
));) {
402 resind
= atoms
->atom
[n
].resind
;
404 for(;((atoms
->atom
[n
].resind
==resind
) && (n
<atoms
->nr
));n
++)
405 if (strcmp("CA",*atoms
->atomname
[n
]) == 0) {
409 } else if (strcmp("C",*atoms
->atomname
[n
]) == 0) {
411 gmx_incons("Atom naming problem");
413 } else if (strcmp("O",*atoms
->atomname
[n
]) == 0) {
415 gmx_incons("Atom naming problem");
417 } else if (strcmp("O1",*atoms
->atomname
[n
]) == 0) {
419 gmx_incons("Atom naming problem");
424 /* copy the residuename to the tail of the groupname */
426 add_grp(gb
,gn
,nra
,aid
,"SwapSC-CO");
434 static void analyse_dna(eRestp restp
[],t_atoms
*atoms
,
435 t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
438 printf("Analysing DNA... (not really)\n");
440 printf("eRestp %p; atoms %p; gb %p; gn %p; bASK %s; bASK %s",
441 (void *)restp
, (void *)atoms
, (void *)gb
, (void *)gn
,
442 bool_names
[bASK
], bool_names
[bVerb
]);
445 t_aa_names
*get_aa_names(void)
447 /* Read the database in aminoacids.dat */
452 aan
->n
= get_strings("aminoacids.dat",&(aan
->aa
));
453 /* qsort(aan->aa,aan->n,sizeof(aan->aa[0]),strcmp);*/
458 bool is_residue(t_aa_names
*aan
,char *resnm
)
460 /* gives true if resnm occurs in aminoacids.dat */
463 for(i
=0; i
<aan
->n
; i
++) {
464 if (strcasecmp(aan
->aa
[i
],resnm
) == 0) {
468 for(i
=0; i
<NDNA
; i
++) {
469 if (strcasecmp(DNA
[i
],resnm
) == 0) {
473 for(i
=0; i
<NRNA
; i
++) {
474 if (strcasecmp(RNA
[i
],resnm
) == 0) {
482 bool is_protein(t_aa_names
*aan
,char *resnm
)
484 /* gives true if resnm occurs in aminoacids.dat */
487 for(i
=0; (i
<aan
->n
); i
++)
488 if (strcasecmp(aan
->aa
[i
],resnm
) == 0)
494 void done_aa_names(t_aa_names
**aan
)
499 for(i
=0; (i
<(*aan
)->n
); i
++)
500 sfree((*aan
)->aa
[i
]);
506 void analyse(t_atoms
*atoms
,t_blocka
*gb
,char ***gn
,bool bASK
,bool bVerb
)
517 printf("Analysing residue names:\n");
518 snew(restp
,atoms
->nres
);
519 aid
=mk_aid(atoms
,restp
,etOther
,&nra
,TRUE
);
520 add_grp(gb
,gn
,nra
,aid
,"System");
523 aan
= get_aa_names();
524 for(i
=0; (i
<atoms
->nres
); i
++) {
525 resnm
= *atoms
->resinfo
[i
].name
;
526 if ((restp
[i
] == etOther
) && is_protein(aan
,resnm
))
528 if (restp
[i
] == etOther
) {
529 for(j
=0; (j
<NDNA
); j
++) {
530 if (strcasecmp(DNA
[j
],resnm
) == 0)
533 for(j
=0; (j
<NRNA
); j
++) {
534 if (strcasecmp(RNA
[j
],resnm
) == 0)
539 p_status(atoms
->nres
,restp
,bVerb
);
543 aid
=mk_aid(atoms
,restp
,etProt
,&nra
,TRUE
);
545 analyse_prot(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
550 aid
=mk_aid(atoms
,restp
,etProt
,&nra
,FALSE
);
551 if ((nra
> 0) && (nra
< atoms
->nr
))
552 add_grp(gb
,gn
,nra
,aid
,"non-Protein");
556 aid
=mk_aid(atoms
,restp
,etDNA
,&nra
,TRUE
);
558 add_grp(gb
,gn
,nra
,aid
,"DNA");
559 analyse_dna(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
564 aid
=mk_aid(atoms
,restp
,etRNA
,&nra
,TRUE
);
566 add_grp(gb
,gn
,nra
,aid
,"RNA");
567 analyse_dna(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
572 analyse_other(restp
,atoms
,gb
,gn
,bASK
,bVerb
);
577 void check_index(char *gname
,int n
,atom_id index
[],char *traj
,int natoms
)
582 if (index
[i
] >= natoms
)
583 gmx_fatal(FARGS
,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
584 gname
? gname
: "Index",i
+1, index
[i
]+1,
585 traj
? traj
: "the trajectory",natoms
);
586 else if (index
[i
] < 0)
587 gmx_fatal(FARGS
,"%s atom number (index[%d]=%d) is less than zero",
588 gname
? gname
: "Index",i
+1, index
[i
]+1);
591 t_blocka
*init_index(const char *gfile
, char ***grpname
)
597 char line
[STRLEN
],*pt
,str
[STRLEN
];
599 in
=gmx_fio_fopen(gfile
,"r");
601 get_a_line(in
,line
,STRLEN
);
602 if ( line
[0]=='[' ) {
611 if (get_header(line
,str
)) {
613 srenew(b
->index
,b
->nr
+1);
614 srenew(*grpname
,b
->nr
);
617 b
->index
[b
->nr
]=b
->index
[b
->nr
-1];
618 (*grpname
)[b
->nr
-1]=strdup(str
);
621 while ((i
=sscanf(pt
,"%s",str
)) == 1) {
625 srenew(b
->a
,maxentries
);
627 b
->a
[i
]=strtol(str
, NULL
, 10)-1;
630 pt
=strstr(pt
,str
)+strlen(str
);
633 } while (get_a_line(in
,line
,STRLEN
));
637 sscanf(line
,"%d%d",&b
->nr
,&b
->nra
);
638 snew(b
->index
,b
->nr
+1);
639 snew(*grpname
,b
->nr
);
642 for (i
=0; (i
<b
->nr
); i
++) {
643 nread
=fscanf(in
,"%s%d",str
,&ng
);
644 (*grpname
)[i
]=strdup(str
);
645 b
->index
[i
+1]=b
->index
[i
]+ng
;
646 if (b
->index
[i
+1] > b
->nra
)
647 gmx_fatal(FARGS
,"Something wrong in your indexfile at group %s",str
);
648 for(j
=0; (j
<ng
); j
++) {
649 nread
=fscanf(in
,"%d",&a
);
650 b
->a
[b
->index
[i
]+j
]=a
;
656 for(i
=0; (i
<b
->nr
); i
++) {
657 for(j
=b
->index
[i
]; (j
<b
->index
[i
+1]); j
++) {
659 fprintf(stderr
,"\nWARNING: negative index %d in group %s\n\n",
660 b
->a
[j
],(*grpname
)[i
]);
667 static void minstring(char *str
)
671 for (i
=0; (i
< (int)strlen(str
)); i
++)
676 int find_group(char s
[], int ngrps
, char **grpname
)
685 /* first look for whole name match */
687 for(i
=0; i
<ngrps
; i
++)
688 if (strcasecmp_min(s
,grpname
[i
])==0) {
693 /* second look for first string match */
695 for(i
=0; i
<ngrps
; i
++)
696 if (strncasecmp_min(s
,grpname
[i
],n
)==0) {
701 /* last look for arbitrary substring match */
705 for(i
=0; i
<ngrps
; i
++) {
706 strcpy(string
, grpname
[i
]);
709 if (strstr(string
,s
)!=NULL
) {
717 printf("Error: Multiple groups '%s' selected\n", s
);
723 static int qgroup(int *a
, int ngrps
, char **grpname
)
731 fprintf(stderr
,"Select a group: ");
733 if ( scanf("%s",s
)!=1 )
734 gmx_fatal(FARGS
,"Cannot read from input");
735 trim(s
); /* remove spaces */
736 } while (strlen(s
)==0);
737 aa
= strtol(s
, &end
, 10);
738 if (aa
==0 && end
[0] != '\0') /* string entered */
739 aa
= find_group(s
, ngrps
, grpname
);
740 bInRange
= (aa
>= 0 && aa
< ngrps
);
742 printf("Error: No such group '%s'\n", s
);
744 printf("Selected %d: '%s'\n", aa
, grpname
[aa
]);
749 static void rd_groups(t_blocka
*grps
,char **grpname
,char *gnames
[],
750 int ngrps
,int isize
[],atom_id
*index
[],int grpnr
[])
755 gmx_fatal(FARGS
,"Error: no groups in indexfile");
756 for(i
=0; (i
<grps
->nr
); i
++)
757 fprintf(stderr
,"Group %5d (%12s) has %5d elements\n",i
,grpname
[i
],
758 grps
->index
[i
+1]-grps
->index
[i
]);
759 for(i
=0; (i
<ngrps
); i
++) {
762 gnr1
=qgroup(&grpnr
[i
], grps
->nr
, grpname
);
763 if ((gnr1
<0) || (gnr1
>=grps
->nr
))
764 fprintf(stderr
,"Select between %d and %d.\n",0,grps
->nr
-1);
765 } while ((gnr1
<0) || (gnr1
>=grps
->nr
));
767 fprintf(stderr
,"There is one group in the index\n");
770 gnames
[i
]=strdup(grpname
[gnr1
]);
771 isize
[i
]=grps
->index
[gnr1
+1]-grps
->index
[gnr1
];
772 snew(index
[i
],isize
[i
]);
773 for(j
=0; (j
<isize
[i
]); j
++)
774 index
[i
][j
]=grps
->a
[grps
->index
[gnr1
]+j
];
778 void rd_index(const char *statfile
,int ngrps
,int isize
[],
779 atom_id
*index
[],char *grpnames
[])
787 gmx_fatal(FARGS
,"No index file specified");
788 grps
=init_index(statfile
,&gnames
);
789 rd_groups(grps
,gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
792 void rd_index_nrs(char *statfile
,int ngrps
,int isize
[],
793 atom_id
*index
[],char *grpnames
[],int grpnr
[])
799 gmx_fatal(FARGS
,"No index file specified");
800 grps
=init_index(statfile
,&gnames
);
802 rd_groups(grps
,gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
805 void get_index(t_atoms
*atoms
, const char *fnm
, int ngrps
,
806 int isize
[], atom_id
*index
[],char *grpnames
[])
809 t_blocka
*grps
= NULL
;
815 grps
=init_index(fnm
,gnames
);
820 analyse(atoms
,grps
,gnames
,FALSE
,FALSE
);
823 gmx_incons("You need to supply a valid atoms structure or a valid index file name");
825 rd_groups(grps
,*gnames
,grpnames
,ngrps
,isize
,index
,grpnr
);
828 t_cluster_ndx
*cluster_index(FILE *fplog
,const char *ndx
)
834 c
->clust
= init_index(ndx
,&c
->grpname
);
836 for(i
=0; (i
<c
->clust
->nra
); i
++)
837 c
->maxframe
= max(c
->maxframe
,c
->clust
->a
[i
]);
838 fprintf(fplog
? fplog
: stdout
,
839 "There are %d clusters containing %d structures, highest framenr is %d\n",
840 c
->clust
->nr
,c
->clust
->nra
,c
->maxframe
);
842 pr_blocka(debug
,0,"clust",c
->clust
,TRUE
);
843 for(i
=0; (i
<c
->clust
->nra
); i
++)
844 if ((c
->clust
->a
[i
] < 0) || (c
->clust
->a
[i
] > c
->maxframe
))
845 gmx_fatal(FARGS
,"Range check error for c->clust->a[%d] = %d\n"
846 "should be within 0 and %d",i
,c
->clust
->a
[i
],c
->maxframe
+1);
848 c
->inv_clust
=make_invblocka(c
->clust
,c
->maxframe
);