2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arrayref.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/sysinfo.h"
73 /*! \brief Throw an error if any element in index exceeds a given number.
75 * \param[in] indices to be acessed
76 * \param[in] largestOkayIndex to be accessed
77 * \param[in] indexUsagePurpose to be more explicit in the error message
79 * \throws RangeError if largestOkayIndex is larger than any element in indices
82 void throwErrorIfIndexOutOfBounds(ArrayRef
<const int> indices
,
83 const int largestOkayIndex
,
84 const std::string
& indexUsagePurpose
)
86 // do nothing if index is empty
87 if (indices
.ssize() == 0)
91 const int largestIndex
= *std::max_element(indices
.begin(), indices
.end());
92 if (largestIndex
< largestOkayIndex
)
94 GMX_THROW(RangeError("The provided structure file only contains "
95 + std::to_string(largestOkayIndex
) + " coordinates, but coordinate index "
96 + std::to_string(largestIndex
) + " was requested for " + indexUsagePurpose
97 + ". Make sure to update structure files "
98 "and index files if you store only a part of your system."));
106 int gmx_covar(int argc
, char* argv
[])
108 const char* desc
[] = {
109 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
110 "covariance matrix.",
111 "All structures are fitted to the structure in the structure file.",
112 "When this is not a run input file periodicity will not be taken into",
113 "account. When the fit and analysis groups are identical and the analysis",
114 "is non mass-weighted, the fit will also be non mass-weighted.",
116 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
117 "When the same atoms are used for the fit and the covariance analysis,",
118 "the reference structure for the fit is written first with t=-1.",
119 "The average (or reference when [TT]-ref[tt] is used) structure is",
120 "written with t=0, the eigenvectors",
121 "are written as frames with the eigenvector number and eigenvalue",
122 "as step number and timestamp, respectively.",
124 "The eigenvectors can be analyzed with [gmx-anaeig].",
126 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
127 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
129 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
131 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
132 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
135 "Note that the diagonalization of a matrix requires memory and time",
136 "that will increase at least as fast as than the square of the number",
137 "of atoms involved. It is easy to run out of memory, in which",
138 "case this tool will probably exit with a 'Segmentation fault'. You",
139 "should consider carefully whether a reduced set of atoms will meet",
140 "your needs for lower costs."
142 static gmx_bool bFit
= TRUE
, bRef
= FALSE
, bM
= FALSE
, bPBC
= TRUE
;
145 { "-fit", FALSE
, etBOOL
, { &bFit
}, "Fit to a reference structure" },
150 "Use the deviation from the conformation in the structure file instead of from the "
152 { "-mwa", FALSE
, etBOOL
, { &bM
}, "Mass-weighted covariance analysis" },
153 { "-last", FALSE
, etINT
, { &end
}, "Last eigenvector to write away (-1 is till the last)" },
154 { "-pbc", FALSE
, etBOOL
, { &bPBC
}, "Apply corrections for periodic boundary conditions" }
156 FILE* out
= nullptr; /* initialization makes all compilers happy */
161 rvec
* x
, *xread
, *xref
, *xav
, *xproj
;
163 real
* sqrtm
, *mat
, *eigenvalues
, sum
, trace
, inv_nframes
;
164 real t
, tstart
, tend
, **mat2
;
165 real xj
, *w_rls
= nullptr;
166 real min
, max
, *axis
;
167 int natoms
, nat
, nframes0
, nframes
, nlevels
;
168 int64_t ndim
, i
, j
, k
, l
;
170 const char * fitfile
, *trxfile
, *ndxfile
;
171 const char * eigvalfile
, *eigvecfile
, *averfile
, *logfile
;
172 const char * asciifile
, *xpmfile
, *xpmafile
;
173 char str
[STRLEN
], *fitname
, *ananame
;
176 gmx_bool bDiffMass1
, bDiffMass2
;
179 gmx_output_env_t
* oenv
;
180 gmx_rmpbc_t gpbc
= nullptr;
183 { efTRX
, "-f", nullptr, ffREAD
}, { efTPS
, nullptr, nullptr, ffREAD
},
184 { efNDX
, nullptr, nullptr, ffOPTRD
}, { efXVG
, nullptr, "eigenval", ffWRITE
},
185 { efTRN
, "-v", "eigenvec", ffWRITE
}, { efSTO
, "-av", "average.pdb", ffWRITE
},
186 { efLOG
, nullptr, "covar", ffWRITE
}, { efDAT
, "-ascii", "covar", ffOPTWR
},
187 { efXPM
, "-xpm", "covar", ffOPTWR
}, { efXPM
, "-xpma", "covara", ffOPTWR
}
189 #define NFILE asize(fnm)
191 if (!parse_common_args(&argc
, argv
, PCA_CAN_TIME
| PCA_TIME_UNIT
, NFILE
, fnm
, asize(pa
), pa
,
192 asize(desc
), desc
, 0, nullptr, &oenv
))
199 fitfile
= ftp2fn(efTPS
, NFILE
, fnm
);
200 trxfile
= ftp2fn(efTRX
, NFILE
, fnm
);
201 ndxfile
= ftp2fn_null(efNDX
, NFILE
, fnm
);
202 eigvalfile
= ftp2fn(efXVG
, NFILE
, fnm
);
203 eigvecfile
= ftp2fn(efTRN
, NFILE
, fnm
);
204 averfile
= ftp2fn(efSTO
, NFILE
, fnm
);
205 logfile
= ftp2fn(efLOG
, NFILE
, fnm
);
206 asciifile
= opt2fn_null("-ascii", NFILE
, fnm
);
207 xpmfile
= opt2fn_null("-xpm", NFILE
, fnm
);
208 xpmafile
= opt2fn_null("-xpma", NFILE
, fnm
);
210 read_tps_conf(fitfile
, &top
, &pbcType
, &xref
, nullptr, box
, TRUE
);
215 printf("\nChoose a group for the least squares fit\n");
216 get_index(atoms
, ndxfile
, 1, &nfit
, &ifit
, &fitname
);
217 // Make sure that we never attempt to access a coordinate out of range
218 gmx::throwErrorIfIndexOutOfBounds({ ifit
, ifit
+ nfit
}, atoms
->nr
, "fitting");
221 gmx_fatal(FARGS
, "Need >= 3 points to fit!\n");
228 printf("\nChoose a group for the covariance analysis\n");
229 get_index(atoms
, ndxfile
, 1, &natoms
, &index
, &ananame
);
230 gmx::throwErrorIfIndexOutOfBounds({ index
, index
+ natoms
}, atoms
->nr
, "analysis");
235 snew(w_rls
, atoms
->nr
);
236 for (i
= 0; (i
< nfit
); i
++)
238 w_rls
[ifit
[i
]] = atoms
->atom
[ifit
[i
]].m
;
241 bDiffMass1
= bDiffMass1
|| (w_rls
[ifit
[i
]] != w_rls
[ifit
[i
- 1]]);
247 for (i
= 0; (i
< natoms
); i
++)
251 sqrtm
[i
] = std::sqrt(atoms
->atom
[index
[i
]].m
);
254 bDiffMass2
= bDiffMass2
|| (sqrtm
[i
] != sqrtm
[i
- 1]);
263 if (bFit
&& bDiffMass1
&& !bDiffMass2
)
265 bDiffMass1
= natoms
!= nfit
;
266 for (i
= 0; (i
< natoms
) && !bDiffMass1
; i
++)
268 bDiffMass1
= index
[i
] != ifit
[i
];
274 "Note: the fit and analysis group are identical,\n"
275 " while the fit is mass weighted and the analysis is not.\n"
276 " Making the fit non mass weighted.\n\n");
277 for (i
= 0; (i
< nfit
); i
++)
279 w_rls
[ifit
[i
]] = 1.0;
284 /* Prepare reference frame */
287 gpbc
= gmx_rmpbc_init(&top
.idef
, pbcType
, atoms
->nr
);
288 gmx_rmpbc(gpbc
, atoms
->nr
, box
, xref
);
292 reset_x(nfit
, ifit
, atoms
->nr
, nullptr, xref
, w_rls
);
298 if (std::sqrt(static_cast<real
>(INT64_MAX
)) < static_cast<real
>(ndim
))
300 gmx_fatal(FARGS
, "Number of degrees of freedoms to large for matrix.\n");
302 snew(mat
, ndim
* ndim
);
304 fprintf(stderr
, "Calculating the average structure ...\n");
306 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
307 if (nat
!= atoms
->nr
)
310 "\nWARNING: number of atoms in structure file (%d) and trajectory (%d) do not "
314 gmx::throwErrorIfIndexOutOfBounds({ ifit
, ifit
+ nfit
}, nat
, "fitting");
315 gmx::throwErrorIfIndexOutOfBounds({ index
, index
+ natoms
}, nat
, "analysis");
320 /* calculate x: a fitted struture of the selected atoms */
323 gmx_rmpbc(gpbc
, nat
, box
, xread
);
327 reset_x(nfit
, ifit
, nat
, nullptr, xread
, w_rls
);
328 do_fit(nat
, w_rls
, xref
, xread
);
330 for (i
= 0; i
< natoms
; i
++)
332 rvec_inc(xav
[i
], xread
[index
[i
]]);
334 } while (read_next_x(oenv
, status
, &t
, xread
, box
));
337 inv_nframes
= 1.0 / nframes0
;
338 for (i
= 0; i
< natoms
; i
++)
340 for (d
= 0; d
< DIM
; d
++)
342 xav
[i
][d
] *= inv_nframes
;
343 xread
[index
[i
]][d
] = xav
[i
][d
];
346 write_sto_conf_indexed(opt2fn("-av", NFILE
, fnm
), "Average structure", atoms
, xread
, nullptr,
347 PbcType::No
, zerobox
, natoms
, index
);
350 fprintf(stderr
, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim
),
351 static_cast<int>(ndim
));
353 nat
= read_first_x(oenv
, &status
, trxfile
, &t
, &xread
, box
);
359 /* calculate x: a (fitted) structure of the selected atoms */
362 gmx_rmpbc(gpbc
, nat
, box
, xread
);
366 reset_x(nfit
, ifit
, nat
, nullptr, xread
, w_rls
);
367 do_fit(nat
, w_rls
, xref
, xread
);
371 for (i
= 0; i
< natoms
; i
++)
373 rvec_sub(xread
[index
[i
]], xref
[index
[i
]], x
[i
]);
378 for (i
= 0; i
< natoms
; i
++)
380 rvec_sub(xread
[index
[i
]], xav
[i
], x
[i
]);
384 for (j
= 0; j
< natoms
; j
++)
386 for (dj
= 0; dj
< DIM
; dj
++)
388 k
= ndim
* (DIM
* j
+ dj
);
390 for (i
= j
; i
< natoms
; i
++)
393 for (d
= 0; d
< DIM
; d
++)
395 mat
[l
+ d
] += x
[i
][d
] * xj
;
400 } while (read_next_x(oenv
, status
, &t
, xread
, box
) && (bRef
|| nframes
< nframes0
));
402 gmx_rmpbc_done(gpbc
);
404 fprintf(stderr
, "Read %d frames\n", nframes
);
408 /* copy the reference structure to the ouput array x */
410 for (i
= 0; i
< natoms
; i
++)
412 copy_rvec(xref
[index
[i
]], xproj
[i
]);
420 /* correct the covariance matrix for the mass */
421 inv_nframes
= 1.0 / nframes
;
422 for (j
= 0; j
< natoms
; j
++)
424 for (dj
= 0; dj
< DIM
; dj
++)
426 for (i
= j
; i
< natoms
; i
++)
428 k
= ndim
* (DIM
* j
+ dj
) + DIM
* i
;
429 for (d
= 0; d
< DIM
; d
++)
431 mat
[k
+ d
] = mat
[k
+ d
] * inv_nframes
* sqrtm
[i
] * sqrtm
[j
];
437 /* symmetrize the matrix */
438 for (j
= 0; j
< ndim
; j
++)
440 for (i
= j
; i
< ndim
; i
++)
442 mat
[ndim
* i
+ j
] = mat
[ndim
* j
+ i
];
447 for (i
= 0; i
< ndim
; i
++)
449 trace
+= mat
[i
* ndim
+ i
];
451 fprintf(stderr
, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace
, bM
? "u " : "");
455 out
= gmx_ffopen(asciifile
, "w");
456 for (j
= 0; j
< ndim
; j
++)
458 for (i
= 0; i
< ndim
; i
+= 3)
460 fprintf(out
, "%g %g %g\n", mat
[ndim
* j
+ i
], mat
[ndim
* j
+ i
+ 1],
461 mat
[ndim
* j
+ i
+ 2]);
472 for (j
= 0; j
< ndim
; j
++)
474 mat2
[j
] = &(mat
[ndim
* j
]);
475 for (i
= 0; i
<= j
; i
++)
477 if (mat2
[j
][i
] < min
)
481 if (mat2
[j
][j
] > max
)
488 for (i
= 0; i
< ndim
; i
++)
501 out
= gmx_ffopen(xpmfile
, "w");
503 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2", "dim", "dim", ndim
, ndim
, axis
,
504 axis
, mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
514 snew(mat2
, ndim
/ DIM
);
515 for (i
= 0; i
< ndim
/ DIM
; i
++)
517 snew(mat2
[i
], ndim
/ DIM
);
519 for (j
= 0; j
< ndim
/ DIM
; j
++)
521 for (i
= 0; i
<= j
; i
++)
524 for (d
= 0; d
< DIM
; d
++)
526 mat2
[j
][i
] += mat
[ndim
* (DIM
* j
+ d
) + DIM
* i
+ d
];
528 if (mat2
[j
][i
] < min
)
532 if (mat2
[j
][j
] > max
)
536 mat2
[i
][j
] = mat2
[j
][i
];
539 snew(axis
, ndim
/ DIM
);
540 for (i
= 0; i
< ndim
/ DIM
; i
++)
553 out
= gmx_ffopen(xpmafile
, "w");
555 write_xpm3(out
, 0, "Covariance", bM
? "u nm^2" : "nm^2", "atom", "atom", ndim
/ DIM
,
556 ndim
/ DIM
, axis
, axis
, mat2
, min
, 0.0, max
, rlo
, rmi
, rhi
, &nlevels
);
559 for (i
= 0; i
< ndim
/ DIM
; i
++)
567 /* call diagonalization routine */
569 snew(eigenvalues
, ndim
);
570 snew(eigenvectors
, ndim
* ndim
);
572 std::memcpy(eigenvectors
, mat
, ndim
* ndim
* sizeof(real
));
573 fprintf(stderr
, "\nDiagonalizing ...\n");
575 eigensolver(eigenvectors
, ndim
, 0, ndim
, eigenvalues
, mat
);
578 /* now write the output */
581 for (i
= 0; i
< ndim
; i
++)
583 sum
+= eigenvalues
[i
];
585 fprintf(stderr
, "\nSum of the eigenvalues: %g (%snm^2)\n", sum
, bM
? "u " : "");
586 if (std::abs(trace
- sum
) > 0.01 * trace
)
589 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
592 /* Set 'end', the maximum eigenvector and -value index used for output */
595 if (nframes
- 1 < ndim
)
599 "\nWARNING: there are fewer frames in your trajectory than there are\n");
600 fprintf(stderr
, "degrees of freedom in your system. Only generating the first\n");
601 fprintf(stderr
, "%d out of %d eigenvectors and eigenvalues.\n", end
, static_cast<int>(ndim
));
609 fprintf(stderr
, "\nWriting eigenvalues to %s\n", eigvalfile
);
611 sprintf(str
, "(%snm\\S2\\N)", bM
? "u " : "");
612 out
= xvgropen(eigvalfile
, "Eigenvalues of the covariance matrix", "Eigenvector index", str
, oenv
);
613 for (i
= 0; (i
< end
); i
++)
615 fprintf(out
, "%10d %g\n", static_cast<int>(i
+ 1), eigenvalues
[ndim
- 1 - i
]);
621 /* misuse lambda: 0/1 mass weighted analysis no/yes */
624 WriteXref
= eWXR_YES
;
625 for (i
= 0; i
< nfit
; i
++)
627 copy_rvec(xref
[ifit
[i
]], x
[i
]);
637 /* misuse lambda: -1 for no fit */
638 WriteXref
= eWXR_NOFIT
;
641 write_eigenvectors(eigvecfile
, natoms
, mat
, TRUE
, 1, end
, WriteXref
, x
, bDiffMass1
, xproj
, bM
,
644 out
= gmx_ffopen(logfile
, "w");
646 fprintf(out
, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
648 fprintf(out
, "Program: %s\n", argv
[0]);
649 gmx_getcwd(str
, STRLEN
);
651 fprintf(out
, "Working directory: %s\n\n", str
);
653 fprintf(out
, "Read %d frames from %s (time %g to %g %s)\n", nframes
, trxfile
,
654 output_env_conv_time(oenv
, tstart
), output_env_conv_time(oenv
, tend
),
655 output_env_get_time_unit(oenv
).c_str());
658 fprintf(out
, "Read reference structure for fit from %s\n", fitfile
);
662 fprintf(out
, "Read index groups from %s\n", ndxfile
);
666 fprintf(out
, "Analysis group is '%s' (%d atoms)\n", ananame
, natoms
);
669 fprintf(out
, "Fit group is '%s' (%d atoms)\n", fitname
, nfit
);
673 fprintf(out
, "No fit was used\n");
675 fprintf(out
, "Analysis is %smass weighted\n", bDiffMass2
? "" : "non-");
678 fprintf(out
, "Fit is %smass weighted\n", bDiffMass1
? "" : "non-");
680 fprintf(out
, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim
),
681 static_cast<int>(ndim
));
682 fprintf(out
, "Trace of the covariance matrix before diagonalizing: %g\n", trace
);
683 fprintf(out
, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum
);
685 fprintf(out
, "Wrote %d eigenvalues to %s\n", static_cast<int>(end
), eigvalfile
);
686 if (WriteXref
== eWXR_YES
)
688 fprintf(out
, "Wrote reference structure to %s\n", eigvecfile
);
690 fprintf(out
, "Wrote average structure to %s and %s\n", averfile
, eigvecfile
);
691 fprintf(out
, "Wrote eigenvectors %d to %d to %s\n", 1, end
, eigvecfile
);
695 fprintf(stderr
, "Wrote the log to %s\n", logfile
);