Improved wallcycle reporting
[gromacs/AngularHB.git] / scripts / xplor2gmx.pl
blob7422551d40f9de34a1f6c07e1b9b58ef83e1d2df
1 #!/usr/bin/perl -w
4 # This script reads an XPLOR input file with distance restraint data
5 # as sometimes is found in the pdb database (http://www.pdb.org).
6 # From this input file dihedral restrints should be removed, such that
7 # only distance restraints are left. The script can handle ambiguous restraints.
8 # It converts the distance restraints to GROMACS format.
9 # A typical command line would be
10 # ./xplor2gmx.pl 33 conf.pdb < restraints.dat > disre.itp
11 # You can turn off debugging info, but please do verify your output is correct!
13 # David van der Spoel (spoel@gromacs.org), July 2002
16 # Turn debugging off (0), or on ( > 0).
17 $debug = 1;
18 # Turn atom name translation on and off
19 $trans = 1;
21 #$res0 = shift;# || die "I need the residue offset\n";
22 $pdb = shift || die "I need the name of the pdb file with correct atom numbers\n";
23 $core = shift || "core.ndx";
24 $tbl = "$ENV{GMXDATA}/gromacs/top/atom_nom.tbl";
26 printf "[ distance_restraints ]\n";
27 printf "; Read an xplor distance restraint file, and output GROMACS distance restraints.\n";
28 printf "; This also needs a pdb file with correct GROMACS atom numbering.\n";
29 printf "; I used $pdb for the atom numbers\n";
30 printf "; This also needs the offset in residues.\n";
31 #printf "; I used $res0 for the residue offset\n";
33 # Read the pdb file
34 # If things go wrong, check whether your pdb file is read correctly.
35 $natom = 0;
36 $nres = 0;
37 @resname = ();
38 open(PDB,$pdb) || die "Can not open file $pdb\n";
39 while ($line = <PDB>) {
40 if (index($line,"ATOM") >= 0) {
41 @tmp = split(' ',$line);
42 $aname[$natom] = $tmp[2];
43 $resnr[$natom] = $tmp[4];
44 if (!defined $resname[$tmp[4]]) {
45 $resname[$tmp[4]] = $tmp[3];
46 $nres++;
48 $natom++;
51 close PDB;
52 printf "; I found $natom atoms in the pdb file $pdb\n";
53 printf "; I found $nres residues in the pdb file $pdb\n";
54 if ($debug > 1) {
55 for ($i = 0; ($i < $natom); $i ++) {
56 printf("; %5d %5s %5s %5d\n",$i+1,$aname[$i],
57 $resname[$resnr[$i]],$resnr[$i]);
62 # Read the name translation table.
63 # Source for this comes from: http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl
64 # It was adapted slightly for GROMACS use, but only as regards formatting,
65 # not for content
67 open(TBL,$tbl) || die "Can not open atom-name translation table $tbl\n";
68 $ntbl=0;
69 while ($line = <TBL>) {
70 @ttt = split('#',$line);
71 @tmp = split(' ',$ttt[0]);
72 if ($#tmp == 3) {
73 # New table entry
74 $tblres[$ntbl] = $tmp[0];
75 $tblxplor[$ntbl] = $tmp[1];
76 $tblgmx[$ntbl] = $tmp[3];
77 $ntbl++;
80 close TBL;
81 printf "; Read $ntbl entries from $tbl\n";
83 @templates = (
84 [ "HA#", "HA1", "HA2" ],
85 [ "HA*", "HA1", "HA2" ],
86 [ "HB#", "HB", "HB1", "HB2" ],
87 [ "HB*", "HB", "HB1", "HB2" ],
88 [ "HG#", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ],
89 [ "HG*", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ],
90 [ "HG1#", "HG11", "HG12", "HG13" ],
91 [ "HG1*", "HG11", "HG12", "HG13" ],
92 [ "HG2#", "HG21", "HG22", "HG23" ],
93 [ "HG2*", "HG21", "HG22", "HG23" ],
94 [ "HD#", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ],
95 [ "HD*", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ],
96 [ "HD1#", "HD11", "HD12" ],
97 [ "HD1*", "HD11", "HD12" ],
98 [ "HD2#", "HD21", "HD22" ],
99 [ "HD2*", "HD21", "HD22" ],
100 [ "HE#", "HE", "HE1", "HE2" ],
101 [ "HE*", "HE", "HE1", "HE2" ],
102 [ "HH#", "HH11", "HH12", "HH21", "HH22" ],
103 [ "HH*", "HH11", "HH12", "HH21", "HH22" ],
104 [ "HZ#", "HZ", "HZ1", "HZ2", "HZ3" ],
105 [ "HZ*", "HZ", "HZ1", "HZ2", "HZ3" ],
106 [ "HN", "H" ]
109 $ntranslated = 0;
110 $nuntransltd = 0;
111 sub transl_aname {
112 my $resnm = shift;
113 my $atom = shift;
115 if ( $trans == 1 ) {
116 for(my $i = 0; ($i < $ntbl); $i ++) {
117 if ($tblres[$i] eq $resnm) {
118 if ($tblxplor[$i] eq $atom) {
119 $ntranslated++;
120 return $tblgmx[$i];
125 $nuntransltd++;
126 if ($debug > 1) {
127 printf "; No name change for $resname[$resnr] $atom\n";
129 return $atom;
132 sub expand_template {
133 my $atom = shift(@_);
134 my $rnum = shift(@_);
135 my $bdone = 0;
136 my @atoms;
137 my $jj = 0;
139 die("No residue name for residue $rnum") if (!defined ($resname[$rnum]));
140 for (my $tt=0; (($tt <= $#templates) && ($bdone == 0)); $tt++) {
141 $templ = $templates[$tt];
142 if ($atom eq $templ->[0]) {
143 for ($jj = 1; ($jj <= $#{$templ}); $jj++) {
144 push @atoms, transl_aname($resname[$rnum],$templ->[$jj]);
146 $bdone = 1;
149 if ($bdone == 0) {
150 push @atoms, transl_aname($resname[$rnum],$atom);
152 if ($debug > 0) {
153 my $natom = $#atoms+1;
154 printf("; Found $natom elements for atom $resname[$rnum] %d $atom:",
155 $rnum+1);
156 for $aa ( @atoms ) {
157 printf " $aa";
159 printf "\n";
161 return @atoms;
164 if ($debug > 1) {
165 printf "; There are $#templates template entries\n";
166 for ($tt=0; ($tt <= $#templates); $tt++) {
167 $templ = $templates[$tt];
168 $ntempl = $#{$templ};
169 printf "; Item $tt ($templates[$tt][0]) has $ntempl entries\n";
173 # This index file holds numbers of restraints involving core atoms
174 @protcore = ( "H", "HN", "HA", "HA1", "HA2", "HB", "HB1", "HB2", "HB3", "HG", "HG1", "HG2", "HG3", "N", "O" );
175 open(CORE,">$core") || die "Can not open $core\n";
176 print CORE "[ core-restraints ]\n";
177 $ncore = 0;
179 $myindex = 0;
180 $linec = 0;
181 $npair = 0;
182 $nunresolved = 0;
183 while ($line = <STDIN>) {
184 @ttt = split('!',$line);
185 if ((index($ttt[0],"assign") >= 0) && (index($ttt[0],"!assign") < 0)) {
186 @tmp = split('\(',$ttt[0]);
187 # Find first argument
188 if (($rhaak = index($tmp[1],')')) < 0) {
189 printf "No ) in '$tmp[1]'\n";
191 $atres1 = substr($tmp[1],0,$rhaak);
192 @at1 = split('or',$atres1);
194 # Find second argument
195 if (($rhaak = index($tmp[2],')')) < 0) {
196 printf "No ) in '$tmp[2]'\n";
198 $atres2 = substr($tmp[2],0,$rhaak);
199 @at2 = split('or',$atres2);
201 @expdata = split('\)',$tmp[2]);
202 @dist = split(' ',$expdata[1]);
204 $bOK = 0;
205 $bCore = 1;
206 foreach $a1 ( @at1 ) {
207 @info1 = split(' ',$a1);
208 $r1 = $info1[1];
209 @atoms1 = expand_template($info1[4],$r1);
211 foreach $a2 ( @at2 ) {
212 @info2 = split(' ',$a2);
213 $r2 = $info2[1];
214 @atoms2 = expand_template($info2[4],$r2);
216 for ($i = 0; ($i < $natom) && ($resnr[$i] < $r1); $i++) { ; }
217 for ( ; ($i < $natom) && ($resnr[$i] == $r1); $i++) {
218 foreach $ii ( @atoms1 ) {
219 if ($ii eq $aname[$i]) {
220 $bCoreI = 0;
221 for $pp ( @protcore ) {
222 if ($ii eq $pp) {
223 $bCoreI = 1;
227 for ($j = 0; ($j < $natom) && ($resnr[$j] < $r2); $j++) { ; }
228 for ( ; ($j < $natom) && ($resnr[$j] == $r2); $j++) {
229 foreach $jj ( @atoms2 ) {
230 if ($jj eq $aname[$j]) {
231 $dd = 0.1*$dist[0];
232 $dminus = 0.1*$dist[1];
233 $dplus = 0.1*$dist[2];
234 $low = $dd-$dminus;
235 $up1 = $dd+$dplus;
236 $up2 = $up1+1;
237 printf("%5d %5d %5d %5d %5d %7.3f %7.3f %7.3f 1.0; res $r1 $ii - res $r2 $jj\n",$i+1,$j+1,1,$myindex,1,$low,$up1,$up2);
238 # Do some checks
239 $bOK = 1;
240 $bCoreJ = 0;
241 for $pp ( @protcore ) {
242 if ($jj eq $pp) {
243 $bCoreJ = 1;
246 if (($bCoreI == 0) || ($bCoreJ == 0)) {
247 if ($debug > 0) {
248 printf "; No core $ii ($bCoreI) $jj ($bCoreJ)\n";
250 $bCore = 0;
252 $npair++;
261 if ($bCore == 1) {
262 printf CORE "$myindex\n";
263 $ncore++;
265 if ($bOK == 0) {
266 printf "; UNRESOLVED: $ttt[0]\n";
267 $nunresolved++;
269 else {
270 $myindex++;
273 $linec++;
275 close CORE;
277 printf "; A total of $myindex lines with distance restraints were read from $linec input lines\n";
278 printf "; From this, $npair actual restraints were generated.\n";
279 printf "; A total of $nunresolved lines could not be interpreted\n";
280 printf "; In the process $ntranslated atoms had a name change\n";
281 printf "; However for $nuntransltd no name change could be found\n";
282 printf "; usually these are either HN->H renamings or not-existing atoms\n";
283 printf "; generated by the template expansion (e.g. HG#)\n";
284 printf "; A total of $ncore restraints were classified as core restraints\n";