changed reading hint
[gromacs/adressmacs.git] / src / kernel / pdb2gmx.c
blob03d8962f6825dc47b821a089f844bfb986d71892
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_pdb2gmx_c = "$Id$";
31 #include <time.h>
32 #include <ctype.h>
33 #include "assert.h"
34 #include "sysstuff.h"
35 #include "typedefs.h"
36 #include "smalloc.h"
37 #include "copyrite.h"
38 #include "string2.h"
39 #include "confio.h"
40 #include "symtab.h"
41 #include "vec.h"
42 #include "statutil.h"
43 #include "futil.h"
44 #include "fatal.h"
45 #include "pdbio.h"
46 #include "toputil.h"
47 #include "h_db.h"
48 #include "physics.h"
49 #include "pgutil.h"
50 #include "calch.h"
51 #include "resall.h"
52 #include "pdb2top.h"
53 #include "ter_db.h"
54 #include "strdb.h"
55 #include "gbutil.h"
56 #include "genhydro.h"
57 #include "readinp.h"
58 #include "xlate.h"
59 #include "specbond.h"
60 #include "index.h"
61 #include "hizzie.h"
63 #define NREXCL 3
65 static char *select_res(int nr,int resnr,char *name[],char *expl[],char *title)
67 int sel=0;
69 printf("Which %s type do you want for residue %d\n",title,resnr+1);
70 for(sel=0; (sel < nr); sel++)
71 printf("%d. %s (%s)\n",sel,expl[sel],name[sel]);
72 printf("\nType a number:"); fflush(stdout);
74 if (scanf("%d",&sel) != 1)
75 fatal_error(0,"Answer me for res %s %d!",title,resnr+1);
77 return name[sel];
80 static char *get_asptp(int resnr)
82 enum { easp, easpH, easpNR };
83 static char *lh[easpNR] = { "ASP", "ASPH" };
84 static char *expl[easpNR] = {
85 "Not protonated (charge -1)",
86 "Protonated (charge 0)"
89 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID");
92 static char *get_glutp(int resnr)
94 enum { eglu, egluH, egluNR };
95 static char *lh[egluNR] = { "GLU", "GLUH" };
96 static char *expl[egluNR] = {
97 "Not protonated (charge -1)",
98 "Protonated (charge 0)"
101 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID");
104 static char *get_lystp(int resnr)
106 enum { elys, elysH, elysNR };
107 static char *lh[elysNR] = { "LYS", "LYSH" };
108 static char *expl[elysNR] = {
109 "Not protonated (charge 0)",
110 "Protonated (charge +1)"
113 return select_res(elysNR,resnr,lh,expl,"LYSINE");
116 static char *get_cystp(int resnr)
118 enum { ecys, ecysH, ecysNR };
119 static char *lh[ecysNR] = { "CYS", "CYSH" };
120 static char *expl[ecysNR] = {
121 "Cysteine in disulfide bridge",
122 "Protonated"
125 return select_res(ecysNR,resnr,lh,expl,"CYSTEINE");
129 static char *get_histp(int resnr)
131 static char *expl[ehisNR] = {
132 "H on ND1 only",
133 "H on NE2 only",
134 "H on ND1 and NE2",
135 "Coupled to Heme"
138 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE");
141 static void rename_pdbres(t_atoms *pdba,char *oldnm,char *newnm,
142 bool bFullCompare)
144 char *resnm;
145 int i;
147 for(i=0; (i<pdba->nres); i++) {
148 resnm=*pdba->resname[i];
149 if ((bFullCompare && (strcasecmp(resnm,oldnm) == 0)) ||
150 (!bFullCompare && strstr(resnm,oldnm) != NULL)) {
151 sfree(*pdba->resname[i]);
152 *pdba->resname[i]=strdup(newnm);
157 static void rename_pdbresint(t_atoms *pdba,char *oldnm,
158 char *gettp(int),bool bFullCompare)
160 int i;
161 char *ptr,*resnm;
163 for(i=0; i<pdba->nres; i++) {
164 resnm=*pdba->resname[i];
165 if ((bFullCompare && (strcmp(resnm,oldnm) == 0)) ||
166 (!bFullCompare && strstr(resnm,oldnm) != NULL)) {
167 ptr=gettp(i);
168 sfree(*pdba->resname[i]);
169 *pdba->resname[i]=strdup(ptr);
174 static void check_occupancy(t_atoms *atoms,char *filename)
176 int i,ftp;
177 int nzero=0;
178 int nnotone=0;
180 ftp = fn2ftp(filename);
181 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
182 fprintf(stderr,"No occupancies in %s\n",filename);
183 else {
184 for(i=0; (i<atoms->nr); i++) {
185 if (atoms->pdbinfo[i].occup == 0)
186 nzero++;
187 else if (atoms->pdbinfo[i].occup != 1)
188 nnotone++;
190 if (nzero == atoms->nr)
191 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
192 else if ((nzero > 0) || (nnotone > 0))
193 fprintf(stderr,
194 "WARNING: there were %d atoms with zero occupancy and %d atoms"
195 " with\n occupancy unequal to one (out of %d atoms)."
196 " Check your pdb file.\n",nzero,nnotone,atoms->nr);
197 else
198 fprintf(stderr,"All occupancies are one\n");
202 void write_posres(char *fn,t_atoms *pdba)
204 FILE *fp;
205 int i;
207 fp=ffopen(fn,"w");
208 fprintf(fp,
209 "; In this topology include file, you will find position restraint\n"
210 "; entries for all the heavy atoms in your original pdb file.\n"
211 "; This means that all the protons which were added by pdb2gmx are\n"
212 "; not restrained.\n"
213 "\n"
214 "[ position_restraints ]\n"
215 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
217 for(i=0; (i<pdba->nr); i++) {
218 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
219 fprintf(fp,"%6d%6d%8.1f%8.1f%8.1f\n",i+1,1,1000.0,1000.0,1000.0);
221 ffclose(fp);
224 int read_pdball(char *inf, char *outf,char *title,
225 t_atoms *atoms, rvec **x,matrix box, bool bRetainH)
226 /* Read a pdb file. (containing proteins) */
228 int natom,new_natom,i;
230 /* READ IT */
231 printf("Reading %s...\n",inf);
232 get_stx_coordnum(inf,&natom);
233 init_t_atoms(atoms,natom,TRUE);
234 snew(*x,natom);
235 read_stx_conf(inf,title,atoms,*x,NULL,box);
236 if (!bRetainH) {
237 new_natom=0;
238 for(i=0; i<atoms->nr; i++)
239 if (!is_hydrogen(*atoms->atomname[i])) {
240 atoms->atom[new_natom]=atoms->atom[i];
241 atoms->atomname[new_natom]=atoms->atomname[i];
242 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
243 copy_rvec((*x)[i],(*x)[new_natom]);
244 new_natom++;
246 atoms->nr=new_natom;
247 natom=new_natom;
250 printf("Read");
251 if (title && title[0])
252 printf(" '%s',",title);
253 printf(" %d atoms\n",natom);
255 /* Rename residues */
256 rename_pdbres(atoms,"SOL","HOH",FALSE);
257 rename_pdbres(atoms,"WAT","HOH",FALSE);
258 rename_pdbres(atoms,"HEM","HEME",FALSE);
260 rename_atoms(atoms);
262 if (natom == 0)
263 return 0;
265 if (outf)
266 write_sto_conf(outf,title,atoms,*x,NULL,box);
268 return natom;
271 void process_chain(t_atoms *pdba, rvec *x,
272 bool bTrpU,bool bPheU,bool bTyrU,
273 bool bLysMan,bool bAspMan,bool bGluMan,
274 bool bHisMan,bool bCysMan,
275 int *nssbonds,t_ssbond **ssbonds,
276 real angle,real distance)
278 /* Rename aromatics, lys, asp and histidine */
279 if (bTyrU) rename_pdbres(pdba,"TYR","TYRU",FALSE);
280 if (bTrpU) rename_pdbres(pdba,"TRP","TRPU",FALSE);
281 if (bPheU) rename_pdbres(pdba,"PHE","PHEU",FALSE);
282 if (bLysMan)
283 rename_pdbresint(pdba,"LYS",get_lystp,FALSE);
284 else
285 rename_pdbres(pdba,"LYS","LYSH",FALSE);
286 if (bAspMan)
287 rename_pdbresint(pdba,"ASP",get_asptp,FALSE);
288 else
289 rename_pdbres(pdba,"ASPH","ASP",FALSE);
290 if (bGluMan)
291 rename_pdbresint(pdba,"GLU",get_glutp,FALSE);
292 else
293 rename_pdbres(pdba,"GLUH","GLU",FALSE);
295 /* Make sure we don't have things like CYS? */
296 rename_pdbres(pdba,"CYS","CYS",FALSE);
297 *nssbonds=mk_specbonds(pdba,x,bCysMan,ssbonds);
298 rename_pdbres(pdba,"CYS","CYSH",TRUE);
300 if (!bHisMan)
301 set_histp(pdba,x,angle,distance);
302 else
303 rename_pdbresint(pdba,"HIS",get_histp,TRUE);
306 /* struct for sorting the atoms from the pdb file */
307 typedef struct {
308 int resnr; /* residue number */
309 int j; /* database order index */
310 int index; /* original atom number */
311 char anm1; /* second letter of atom name */
312 char altloc; /* alternate location indicator */
313 } t_pdbindex;
315 int pdbicomp(const void *a,const void *b)
317 t_pdbindex *pa,*pb;
318 int d;
320 pa=(t_pdbindex *)a;
321 pb=(t_pdbindex *)b;
323 d = (pa->resnr - pb->resnr);
324 if (d==0) {
325 d = (pa->j - pb->j);
326 if (d==0) {
327 d = (pa->anm1 - pb->anm1);
328 if (d==0)
329 d = (pa->altloc - pb->altloc);
333 return d;
336 static void sort_pdbatoms(int nrtp,t_restp restp[],
337 int natoms,t_atoms **pdbaptr,rvec **x,
338 t_block *block,char ***gnames)
340 t_atoms *pdba,*pdbnew;
341 rvec **xnew;
342 int i,j;
343 t_restp *rptr;
344 t_pdbindex *pdbi;
345 atom_id *a;
346 char *atomnm,*resnm;
348 pdba=*pdbaptr;
349 natoms=pdba->nr;
350 pdbnew=NULL;
351 snew(xnew,1);
352 snew(pdbi, natoms);
354 for(i=0; i<natoms; i++) {
355 atomnm=*pdba->atomname[i];
356 resnm=*pdba->resname[pdba->atom[i].resnr];
357 if ((rptr=search_rtp(resnm,nrtp,restp)) == NULL)
358 fatal_error(0,"Residue type %s not found",resnm);
359 for(j=0; (j<rptr->natom); j++)
360 if (strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
361 break;
362 if (j==rptr->natom) {
363 if ( ( ( pdba->atom[i].resnr == 0) && (atomnm[0] == 'H') &&
364 ( (atomnm[1] == '1') || (atomnm[1] == '2') ||
365 (atomnm[1] == '3') ) ) )
366 j=1;
367 else {
368 char buf[STRLEN];
370 sprintf(buf,"Atom %s in residue %s %d not found in database\n"
371 " while sorting atoms%n",atomnm,
372 rptr->resname,pdba->atom[i].resnr+1,&i);
373 if ( is_hydrogen(atomnm) )
374 sprintf(buf+i,". Maybe different protonation state.\n"
375 " Remove this hydrogen or choose a different "
376 "protonation state.");
377 fatal_error(0,buf);
380 /* make shadow array to be sorted into indexgroup */
381 pdbi[i].resnr = pdba->atom[i].resnr;
382 pdbi[i].j = j;
383 pdbi[i].index = i;
384 pdbi[i].anm1 = atomnm[1];
385 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
387 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
389 /* pdba is sorted in pdbnew using the pdbi index */
390 snew(a,natoms);
391 snew(pdbnew,1);
392 init_t_atoms(pdbnew,natoms,TRUE);
393 snew(*xnew,natoms);
394 pdbnew->nr=pdba->nr;
395 pdbnew->nres=pdba->nres;
396 sfree(pdbnew->resname);
397 pdbnew->resname=pdba->resname;
398 for (i=0; i<natoms; i++) {
399 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
400 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
401 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
402 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
403 /* make indexgroup in block */
404 a[i]=pdbi[i].index;
406 /* clean up */
407 sfree(pdba->atomname);
408 sfree(pdba->atom);
409 sfree(pdba->pdbinfo);
410 done_block(&pdba->excl);
411 sfree(pdba);
412 sfree(*x);
413 /* copy the sorted pdbnew back to pdba */
414 *pdbaptr=pdbnew;
415 *x=*xnew;
416 add_grp(block, gnames, natoms, a, "prot_sort");
417 sfree(xnew);
418 sfree(a);
419 sfree(pdbi);
422 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[])
424 int i,j,oldnatoms;
426 printf("Checking for duplicate atoms....\n");
427 oldnatoms = pdba->nr;
429 /* NOTE: pdba->nr is modified inside the loop */
430 for(i=1; (i < pdba->nr); i++) {
431 /* compare 'i' and 'i-1', throw away 'i' if they are identical
432 this is a 'while' because multiple alternate locations can be present */
433 while ( (i < pdba->nr) &&
434 (pdba->atom[i-1].resnr == pdba->atom[i].resnr) &&
435 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
436 printf("deleting duplicate atom %4s %s%4d",
437 *pdba->atomname[i], *pdba->resname[pdba->atom[i].resnr],
438 pdba->atom[i].resnr+1);
439 if (pdba->atom[i].chain && (pdba->atom[i].chain!=' '))
440 printf(" ch %c", pdba->atom[i].chain);
441 if (pdba->pdbinfo) {
442 if (pdba->pdbinfo[i].atomnr)
443 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
444 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
445 printf(" altloc %c",pdba->pdbinfo[i].altloc);
447 printf("\n");
448 pdba->nr--;
449 sfree(pdba->atomname[i]);
450 for (j=i; j < pdba->nr; j++) {
451 pdba->atom[j] = pdba->atom[j+1];
452 pdba->atomname[j] = pdba->atomname[j+1];
453 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
454 copy_rvec(x[j+1],x[j]);
456 srenew(pdba->atom, pdba->nr);
457 srenew(pdba->atomname, pdba->nr);
458 srenew(pdba->pdbinfo, pdba->nr);
461 if (pdba->nr != oldnatoms)
462 printf("Now there are %d atoms\n",pdba->nr);
464 return pdba->nr;
467 void find_nc_ter(t_atoms *pdba,int *rn,int *rc)
469 int rnr;
471 *rn=-1;
472 *rc=-1;
473 for(rnr=0; (rnr<pdba->nres); rnr++) {
474 if ((*rn == -1) && (is_protein(*pdba->resname[rnr])))
475 *rn=rnr;
476 if ((*rc != rnr) && (is_protein(*pdba->resname[rnr])))
477 *rc=rnr;
479 if (debug) fprintf(debug,"nres: %d, rN: %d, rC: %d\n",pdba->nres,*rn,*rc);
482 int main(int argc, char *argv[])
484 static char *desc[] = {
485 "This program reads a pdb file, lets you choose a forcefield, reads",
486 "some database files, adds hydrogens to the molecules and generates",
487 "coordinates in Gromacs (Gromos) format and a topology in Gromacs format.",
488 "These files can subsequently be processed to generate a run input file.",
489 "[PAR]",
491 "Note that a pdb file is nothing more than a file format, and it",
492 "need not necessarily contain a protein structure. Every kind of",
493 "molecule for which there is support in the database can be converted.",
494 "If there is no support in the database, you can add it yourself.[PAR]",
496 "The program has limited intelligence, it reads a number of database",
497 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
498 "if necessary this can be done manually. The program can prompt the",
499 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
500 "wants. For LYS the choice is between LYS (two protons on NZ) or LYSH",
501 "(three protons, default), for ASP and GLU unprotonated (default) or",
502 "protonated, for HIS the proton can be either on ND1 (HISA), on NE2",
503 "(HISB) or on both (HISH). By default these selections are done",
504 "automatically. For His, this is based on an optimal hydrogen bonding",
505 "conformation. Hydrogen bonds are defined based on a simple geometric",
506 "criterium, specified by the maximum hydrogen-donor-acceptor angle",
507 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
508 "[TT]-dist[tt] respectively.[PAR]",
510 "pdb2gmx will also check the occupancy field of the pdb file.",
511 "If any of the occupanccies are not one, indicating that the atom is",
512 "not resolved well in the structure, a warning message is issued.",
513 "When a pdb file does not originate from an X-Ray structure determination",
514 "all occupancy fields may be zero. Either way, it is up to the user",
515 "to verify the correctness of the input data (read the article!).[PAR]",
517 "During processing the atoms will be reordered according to Gromacs",
518 "conventions. With [TT]-n[tt] an index file can be generated that",
519 "contains one group reordered in the same way. This allows you to",
520 "convert a Gromos trajectory and coordinate file to Gromos. There is",
521 "one limitation: reordering is done after the hydrogens are stripped",
522 "from the input and before new hydrogens are added. This means that",
523 "should not turn off [TT]-reth[tt].[PAR]",
525 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
526 "identifiers. Therefore it is useful to enter a pdb file name at",
527 "the [TT]-o[tt] option when you want to convert a multichain pdb file.",
528 "[PAR]",
530 "When using [TT]-reth[tt] to keep all hydrogens from the [TT].pdb[tt]",
531 "file, the names of the hydrogens in the [TT].pdb[tt] file [IT]must[it]",
532 "match the names in the database.[PAR]",
534 "[TT]-sort[tt] will sort all residues according to the order in the",
535 "database, sometimes this is necessary to get charge groups",
536 "together.[PAR]",
538 "[TT]-alldih[tt] will generate all proper dihedrals instead of only",
539 "those with as few hydrogens as possible, this is useful for use with",
540 "the Charmm forcefield.[PAR]",
542 "The option [TT]-dummy[tt] removes hydrogen and fast improper dihedral",
543 "motions. Angular and out-of-plane motions can be removed by changing",
544 "hydrogens into dummy atoms and fixing angles, which fixes their",
545 "position relative to neighboring atoms. Additionally, all atoms in the",
546 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
547 "can be converted into dummy atoms, elminating the fast improper dihedral",
548 "fluctuations in these rings. Note that in this case all other hydrogen",
549 "atoms are also converted to dummy atoms. The mass of all atoms that are",
550 "converted into dummy atoms, is added to the heavy atoms.[PAR]",
551 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
552 "done by increasing the hydrogen-mass by a factor of 4. This is also",
553 "done for water hydrogens to slow down the rotational motion of water.",
554 "The increase in mass of the hydrogens is subtracted from the bonded",
555 "(heavy) atom so that the total mass of the system remains the same."
558 typedef struct {
559 char chain;
560 int start;
561 int natom;
562 bool bAllWat;
563 } t_pdbchain;
565 typedef struct {
566 char chain;
567 bool bAllWat;
568 t_atoms *pdba;
569 rvec *x;
570 } t_chain;
572 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
573 int natom,nres;
574 t_atoms pdba_all,*pdba;
575 t_atoms *atoms;
576 t_block *block;
577 int chain,nchain,nwaterchain;
578 t_pdbchain *pdbchains;
579 t_chain *chains;
580 char pchain;
581 int nincl,nmol;
582 char **incls;
583 t_mols *mols;
584 char **gnames;
585 matrix box;
586 rvec box_space;
587 char *ff;
588 int i,j,k,l,nrtp,rN,rC;
589 int *swap_index,si;
590 int bts[ebtsNR];
591 t_restp *restp;
592 t_hackblock *ah;
593 t_symtab tab;
594 t_atomtype *atype;
595 char fn[256],*top_fn,itp_fn[STRLEN],posre_fn[STRLEN];
596 char molname[STRLEN],title[STRLEN];
597 char *c;
598 int nah,nNtdb,nCtdb;
599 t_hackblock *ntdb,*ctdb,*sel_ntdb=NULL,*sel_ctdb=NULL;
600 int nssbonds;
601 t_ssbond *ssbonds;
602 rvec *pdbx,*x;
603 bool bUsed,bDummies=FALSE,bWat,bPrevWat=FALSE,bITP,bDummyAromatics=FALSE;
604 real mHmult=0;
606 t_filenm fnm[] = {
607 { efSTX, "-f", "eiwit.pdb", ffREAD },
608 { efSTO, "-o", "conf", ffWRITE },
609 { efTOP, NULL, NULL, ffWRITE },
610 { efITP, "-i", "posre", ffWRITE },
611 { efNDX, "-n", "clean", ffOPTWR },
612 { efSTO, "-q", "clean.pdb", ffOPTWR }
614 #define NFILE asize(fnm)
616 /* Command line arguments msut be static */
617 static bool bNewRTP=FALSE;
618 static bool bInter=FALSE, bCysMan=FALSE;
619 static bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
620 static bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
621 static bool bH14=FALSE, bSort=TRUE, bRetainH=TRUE;
622 static bool bAlldih=FALSE;
623 static real angle=135.0, distance=0.3;
624 static real long_bond_dist=0.25, short_bond_dist=0.05;
625 static char *dumstr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
626 t_pargs pa[] = {
627 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
628 "HIDDENWrite the residue database in new format to 'new.rtp'"},
629 { "-lb", FALSE, etREAL, {&long_bond_dist},
630 "HIDDENLong bond warning distance" },
631 { "-sb", FALSE, etREAL, {&short_bond_dist},
632 "HIDDENShort bond warning distance" },
633 { "-inter", FALSE, etBOOL, {&bInter},
634 "Set the next 6 options to interactive"},
635 { "-ss", FALSE, etBOOL, {&bCysMan},
636 "Interactive SS bridge selection" },
637 { "-ter", FALSE, etBOOL, {&bTerMan},
638 "Interactive termini selection, iso charged" },
639 { "-lys", FALSE, etBOOL, {&bLysMan},
640 "Interactive Lysine selection, iso charged" },
641 { "-asp", FALSE, etBOOL, {&bAspMan},
642 "Interactive Aspartic Acid selection, iso charged" },
643 { "-glu", FALSE, etBOOL, {&bGluMan},
644 "Interactive Glutamic Acid selection, iso charged" },
645 { "-his", FALSE, etBOOL, {&bHisMan},
646 "Interactive Histidine selection, iso checking H-bonds" },
647 { "-angle", FALSE, etREAL, {&angle},
648 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
649 { "-dist", FALSE, etREAL, {&distance},
650 "Maximum donor-acceptor distance for a H-bond (nm)" },
651 { "-una", FALSE, etBOOL, {&bUnA},
652 "Select aromatic rings with united CH atoms on Phenylalanine, "
653 "Tryptophane and Tyrosine" },
654 { "-sort", FALSE, etBOOL, {&bSort},
655 "Sort the residues according to database" },
656 { "-H14", FALSE, etBOOL, {&bH14},
657 "Use 3rd neighbor interactions for hydrogen atoms" },
658 { "-reth", FALSE, etBOOL, {&bRetainH},
659 "Retain hydrogen atoms that are in the pdb file" },
660 { "-alldih", FALSE, etBOOL, {&bAlldih},
661 "Generate all proper dihedrals" },
662 { "-dummy", FALSE, etENUM, {dumstr},
663 "Convert atoms to dummy atoms" },
664 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
665 "Make hydrogen atoms heavy" }
667 #define NPARGS asize(pa)
669 CopyRight(stderr,argv[0]);
670 parse_common_args(&argc,argv,0,FALSE,NFILE,fnm,asize(pa),pa,asize(desc),desc,
671 0,NULL);
672 if (bInter) {
673 /* if anything changes here, also change description of -inter */
674 bCysMan = TRUE;
675 bTerMan = TRUE;
676 bLysMan = TRUE;
677 bAspMan = TRUE;
678 bGluMan = TRUE;
679 bHisMan = TRUE;
682 if (bHeavyH)
683 mHmult=4.0;
684 else
685 mHmult=1.0;
687 switch(dumstr[0][0]) {
688 case 'n': /* none */
689 bDummies=FALSE;
690 bDummyAromatics=FALSE;
691 break;
692 case 'h': /* hydrogens */
693 bDummies=TRUE;
694 bDummyAromatics=FALSE;
695 break;
696 case 'a': /* aromatics */
697 bDummies=TRUE;
698 bDummyAromatics=TRUE;
699 break;
700 default:
701 fatal_error(0,"DEATH HORROR in $s (%d): dumstr[0]='%s'",
702 __FILE__,__LINE__,dumstr[0]);
703 }/* end switch */
705 clear_mat(box);
706 natom=read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
707 &pdba_all,&pdbx,box,bRetainH);
709 if (natom==0)
710 fatal_error(0,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
712 printf("Analyzing pdb file\n");
713 nchain=0;
714 nwaterchain=0;
715 /* keep the compiler happy */
716 pchain='?';
717 pdbchains=NULL;
718 for (i=0; (i<natom); i++) {
719 bWat = strcasecmp(*pdba_all.resname[pdba_all.atom[i].resnr],"HOH") == 0;
720 if ((i==0) || (pdba_all.atom[i].chain!=pchain) || (bWat != bPrevWat)) {
721 pchain=pdba_all.atom[i].chain;
722 /* set natom for previous chain */
723 if (nchain > 0)
724 pdbchains[nchain-1].natom=i-pdbchains[nchain-1].start;
725 if (bWat) {
726 nwaterchain++;
727 pdba_all.atom[i].chain='\0';
729 /* check if chain identifier was used before */
730 for (j=0; (j<nchain); j++)
731 if ((pdbchains[j].chain != '\0') && (pdbchains[j].chain != ' ') &&
732 (pdbchains[j].chain == pdba_all.atom[i].chain))
733 fatal_error(0,"Chain identifier '%c' was used "
734 "in two non-sequential blocks (residue %d, atom %d)",
735 pdba_all.atom[i].chain,pdba_all.atom[i].resnr+1,i+1);
736 srenew(pdbchains,nchain+1);
737 pdbchains[nchain].chain=pdba_all.atom[i].chain;
738 pdbchains[nchain].start=i;
739 pdbchains[nchain].bAllWat=bWat;
740 nchain++;
742 bPrevWat=bWat;
744 pdbchains[nchain-1].natom=natom-pdbchains[nchain-1].start;
746 /* set all the water blocks at the end of the chain */
747 snew(swap_index,nchain);
748 j=0;
749 for(i=0; i<nchain; i++)
750 if (!pdbchains[i].bAllWat) {
751 swap_index[j]=i;
752 j++;
754 for(i=0; i<nchain; i++)
755 if (pdbchains[i].bAllWat) {
756 swap_index[j]=i;
757 j++;
759 if (nwaterchain>1)
760 printf("Moved all the water blocks to the end\n");
762 snew(chains,nchain);
763 /* copy pdb data and x for all chains */
764 for (i=0; (i<nchain); i++) {
765 si=swap_index[i];
766 chains[i].chain = pdbchains[si].chain;
767 chains[i].bAllWat = pdbchains[si].bAllWat;
768 /* check for empty chain identifiers */
769 if ((nchain-nwaterchain>1) && !pdbchains[si].bAllWat &&
770 ((chains[i].chain=='\0') || (chains[i].chain==' '))) {
771 bUsed=TRUE;
772 for(k='A'; (k<='Z') && bUsed; k++) {
773 bUsed=FALSE;
774 for(j=0; j<nchain; j++)
775 bUsed=bUsed || (!pdbchains[j].bAllWat && (chains[j].chain==k));
776 if (!bUsed) {
777 printf("Gave chain %d chain identifier '%c'\n",i+1,k);
778 chains[i].chain=k;
782 snew(chains[i].pdba,1);
783 init_t_atoms(chains[i].pdba,pdbchains[si].natom,TRUE);
784 snew(chains[i].x,chains[i].pdba->nr);
785 for (j=0; j<chains[i].pdba->nr; j++) {
786 chains[i].pdba->atom[j]=pdba_all.atom[pdbchains[si].start+j];
787 snew(chains[i].pdba->atomname[j],1);
788 *chains[i].pdba->atomname[j] =
789 strdup(*pdba_all.atomname[pdbchains[si].start+j]);
790 /* make all chain identifiers equal to that off the chain */
791 chains[i].pdba->atom[j].chain=pdbchains[si].chain;
792 chains[i].pdba->pdbinfo[j]=pdba_all.pdbinfo[pdbchains[si].start+j];
793 copy_rvec(pdbx[pdbchains[si].start+j],chains[i].x[j]);
795 /* Renumber the residues assuming that the numbers are continuous */
796 k = chains[i].pdba->atom[0].resnr;
797 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resnr - k + 1;
798 chains[i].pdba->nres = nres;
799 for(j=0; j < chains[i].pdba->nr; j++)
800 chains[i].pdba->atom[j].resnr -= k;
801 srenew(chains[i].pdba->resname,nres);
802 for(j=0; j<nres; j++) {
803 snew(chains[i].pdba->resname[j],1);
804 *chains[i].pdba->resname[j] = strdup(*pdba_all.resname[k+j]);
808 printf("There are %d chains and %d blocks of water and "
809 "%d residues with %d atoms\n",
810 nchain-nwaterchain,nwaterchain,
811 pdba_all.atom[natom-1].resnr+1,natom);
813 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
814 for (i=0; (i<nchain); i++)
815 printf(" %d '%c'%s %4d %6d %s\n",
816 i+1, chains[i].chain, chains[i].chain?"":" ",
817 chains[i].pdba->nres, chains[i].pdba->nr,
818 chains[i].bAllWat ? "(only water)":"");
819 printf("\n");
821 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm));
823 ff=choose_ff();
824 printf("Using %s force field\n",ff);
826 /* Read atomtypes... */
827 open_symtab(&tab);
828 atype=read_atype(ff,&tab);
830 /* read residue database */
831 printf("Reading residue database... (%s)\n",ff);
832 nrtp=read_resall(ff,bts,&restp,atype,&tab);
833 if (bNewRTP) {
834 fp=ffopen("new.rtp","w");
835 print_resall(fp,bts,nrtp,restp,atype);
836 fclose(fp);
839 /* read hydrogen database */
840 nah=read_h_db(ff,&ah);
842 /* Read Termini database... */
843 sprintf(fn,"%s-n.tdb",ff);
844 nNtdb=read_ter_db(fn,&ntdb,atype);
845 sprintf(fn,"%s-c.tdb",ff);
846 nCtdb=read_ter_db(fn,&ctdb,atype);
848 top_fn=ftp2fn(efTOP,NFILE,fnm);
849 top_file=ffopen(top_fn,"w");
850 print_top_header(top_file,title,FALSE,ff,mHmult);
852 nincl=0;
853 nmol=0;
854 incls=NULL;
855 mols=NULL;
856 nres=0;
857 for(chain=0; (chain<nchain); chain++) {
858 /* set pdba, natom and nres to the current chain */
859 pdba =chains[chain].pdba;
860 x =chains[chain].x;
861 natom=chains[chain].pdba->nr;
862 nres =chains[chain].pdba->nres;
864 if (chains[chain].chain && ( chains[chain].chain != ' ' ) )
865 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
866 chain+1,chains[chain].chain,natom,nres);
867 else
868 printf("Processing chain %d (%d atoms, %d residues)\n",
869 chain+1,natom,nres);
871 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
872 bHisMan,bCysMan,&nssbonds,&ssbonds,angle,distance);
874 if (bSort) {
875 block = new_block();
876 snew(gnames,1);
877 sort_pdbatoms(nrtp,restp,natom,&pdba,&x,block,&gnames);
878 natom = remove_duplicate_atoms(pdba,x);
879 if (ftp2bSet(efNDX,NFILE,fnm)) {
880 if (!bRetainH)
881 fprintf(stderr,"WARNING: without the -reth option the generated "
882 "index file (%s) might be useless\n"
883 "(the index file is generated before hydrogens are added)",
884 ftp2fn(efNDX,NFILE,fnm));
885 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
887 for(i=0; i < block->nr; i++)
888 sfree(gnames[i]);
889 sfree(gnames);
890 done_block(block);
891 } else
892 fprintf(stderr,"WARNING: "
893 "without sorting no check for duplicate atoms can be done\n");
895 if (debug) {
896 if ( chains[chain].chain == '\0' || chains[chain].chain == ' ')
897 sprintf(fn,"chain.pdb");
898 else
899 sprintf(fn,"chain_%c.pdb",chains[chain].chain);
900 write_sto_conf(fn,title,pdba,x,NULL,box);
903 find_nc_ter(pdba,&rN,&rC);
905 if ( (rN<0) || (rC<0) ) {
906 printf("No N- or C-terminus found: "
907 "this chain appears to contain no protein\n");
908 } else {
909 /* set termini */
910 if ( (rN>=0) && (bTerMan || (nNtdb<4)) )
911 sel_ntdb=choose_ter(nNtdb,ntdb,"Select N-terminus type (start)");
912 else
913 if (strncmp(*pdba->resname[pdba->atom[rN].resnr],"PRO",3))
914 sel_ntdb=&(ntdb[1]);
915 else
916 sel_ntdb=&(ntdb[3]);
917 printf("N-terminus: %s\n",sel_ntdb->name);
919 if ( (rC>=0) && (bTerMan || (nCtdb<2)) )
920 sel_ctdb=choose_ter(nCtdb,ctdb,"Select C-terminus type (end)");
921 else
922 sel_ctdb=&(ctdb[1]);
923 printf("C-terminus: %s\n",sel_ctdb->name);
926 /* Generate Hydrogen atoms (and termini) in the sequence */
927 natom=add_h(&pdba,&x,nah,ah,sel_ntdb,sel_ctdb,rN,rC);
928 printf("Now there are %d residues with %d atoms\n",
929 pdba->nres,pdba->nr);
930 if (debug) write_pdbfile(debug,title,pdba,x,box,0,TRUE);
932 if (debug)
933 for(i=0; (i<natom); i++)
934 fprintf(debug,"Res %s%d atom %d %s\n",
935 *(pdba->resname[pdba->atom[i].resnr]),
936 pdba->atom[i].resnr+1,i+1,*pdba->atomname[i]);
938 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
940 /* make up molecule name(s) */
941 if (chains[chain].bAllWat)
942 sprintf(molname,"Water");
943 else if ( chains[chain].chain == '\0' || chains[chain].chain == ' ' )
944 sprintf(molname,"Protein");
945 else
946 sprintf(molname,"Protein_%c",chains[chain].chain);
948 if ((nchain-nwaterchain>1) && !chains[chain].bAllWat) {
949 bITP=TRUE;
950 strcpy(itp_fn,top_fn);
951 printf("Chain time...\n");
952 c=strrchr(itp_fn,'.');
953 if ( chains[chain].chain == '\0' || chains[chain].chain == ' ' )
954 sprintf(c,".itp");
955 else
956 sprintf(c,"_%c.itp",chains[chain].chain);
957 c=strrchr(posre_fn,'.');
958 if ( chains[chain].chain == '\0' || chains[chain].chain == ' ' )
959 sprintf(c,".itp");
960 else
961 sprintf(c,"_%c.itp",chains[chain].chain);
963 nincl++;
964 srenew(incls,nincl);
965 incls[nincl-1]=strdup(itp_fn);
966 itp_file=ffopen(itp_fn,"w");
967 } else
968 bITP=FALSE;
970 srenew(mols,nmol+1);
971 if (chains[chain].bAllWat) {
972 mols[nmol].name = strdup("SOL");
973 mols[nmol].nr = pdba->nres;
974 } else {
975 mols[nmol].name = strdup(molname);
976 mols[nmol].nr = 1;
978 nmol++;
980 if (bITP)
981 print_top_comment(itp_file,title,TRUE);
983 if (chains[chain].bAllWat)
984 top_file2=NULL;
985 else
986 if (bITP)
987 top_file2=itp_file;
988 else
989 top_file2=top_file;
991 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&tab,bts,nrtp,restp,
992 sel_ntdb,sel_ctdb,bH14,rN,rC,bAlldih,
993 bDummies,bDummyAromatics,mHmult,nssbonds,ssbonds,NREXCL,
994 long_bond_dist, short_bond_dist);
996 if (!chains[chain].bAllWat)
997 write_posres(posre_fn,pdba);
999 if (bITP)
1000 fclose(itp_file);
1002 /* pdba and natom have been reassigned somewhere so: */
1003 chains[chain].pdba = pdba;
1004 chains[chain].x = x;
1006 if (debug) {
1007 if ( chains[chain].chain == '\0' || chains[chain].chain == ' ' )
1008 sprintf(fn,"chain.pdb");
1009 else
1010 sprintf(fn,"chain_%c.pdb",chains[chain].chain);
1011 write_sto_conf(fn,cool_quote(),pdba,x,NULL,box);
1015 print_top_mols(top_file,title,nincl,incls,nmol,mols);
1016 fclose(top_file);
1018 /* now merge all chains back together */
1019 natom=0;
1020 nres=0;
1021 for (i=0; (i<nchain); i++) {
1022 natom+=chains[i].pdba->nr;
1023 nres+=chains[i].pdba->nres;
1025 snew(atoms,1);
1026 init_t_atoms(atoms,natom,FALSE);
1027 for(i=0; i < atoms->nres; i++)
1028 sfree(atoms->resname[i]);
1029 sfree(atoms->resname);
1030 atoms->nres=nres;
1031 snew(atoms->resname,nres);
1032 snew(x,natom);
1033 k=0;
1034 l=0;
1035 for (i=0; (i<nchain); i++) {
1036 if (nchain>1)
1037 printf("Including chain %d in system: %d atoms %d residues\n",
1038 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1039 for (j=0; (j<chains[i].pdba->nr); j++) {
1040 atoms->atom[k]=chains[i].pdba->atom[j];
1041 atoms->atom[k].resnr+=l; /* l is processed nr of residues */
1042 atoms->atomname[k]=chains[i].pdba->atomname[j];
1043 atoms->atom[k].chain=chains[i].chain;
1044 copy_rvec(chains[i].x[j],x[k]);
1045 k++;
1047 for (j=0; (j<chains[i].pdba->nres); j++) {
1048 atoms->resname[l]=chains[i].pdba->resname[j];
1049 l++;
1053 if (nchain>1) {
1054 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
1055 print_sums(atoms, TRUE);
1058 fprintf(stderr,"\nWriting coordinate file...\n");
1059 clear_rvec(box_space);
1060 if (box[0][0] == 0)
1061 gen_box(0,atoms->nr,x,box,box_space,FALSE);
1062 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,box);
1064 thanx(stdout);
1066 return 0;