Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / gmxana / gmx_cluster.cpp
blob9fe99f540fc317ce724bda6e52d7b81fc79316d4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstdlib>
41 #include <cstring>
43 #include <algorithm>
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/matio.h"
49 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxana/cmat.h"
52 #include "gromacs/gmxana/gmx_ana.h"
53 #include "gromacs/linearalgebra/eigensolver.h"
54 #include "gromacs/math/do_fit.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/random/threefry.h"
59 #include "gromacs/random/uniformintdistribution.h"
60 #include "gromacs/random/uniformrealdistribution.h"
61 #include "gromacs/topology/index.h"
62 #include "gromacs/topology/topology.h"
63 #include "gromacs/utility/arraysize.h"
64 #include "gromacs/utility/cstringutil.h"
65 #include "gromacs/utility/fatalerror.h"
66 #include "gromacs/utility/futil.h"
67 #include "gromacs/utility/smalloc.h"
68 #include "gromacs/utility/stringutil.h"
70 /* print to two file pointers at once (i.e. stderr and log) */
71 static inline
72 void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
74 fprintf(fp1, "%s", buf);
75 fprintf(fp2, "%s", buf);
78 /* just print a prepared buffer to fp1 and fp2 */
79 static inline
80 void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
82 lo_ffprintf(fp1, fp2, buf);
85 /* prepare buffer with one argument, then print to fp1 and fp2 */
86 static inline
87 void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
89 sprintf(buf, fmt, arg);
90 lo_ffprintf(fp1, fp2, buf);
93 /* prepare buffer with one argument, then print to fp1 and fp2 */
94 static inline
95 void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
97 sprintf(buf, fmt, arg);
98 lo_ffprintf(fp1, fp2, buf);
101 /* prepare buffer with one argument, then print to fp1 and fp2 */
102 static inline
103 void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
105 sprintf(buf, fmt, arg);
106 lo_ffprintf(fp1, fp2, buf);
109 /* prepare buffer with two arguments, then print to fp1 and fp2 */
110 static inline
111 void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
113 sprintf(buf, fmt, arg1, arg2);
114 lo_ffprintf(fp1, fp2, buf);
117 /* prepare buffer with two arguments, then print to fp1 and fp2 */
118 static inline
119 void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
121 sprintf(buf, fmt, arg1, arg2);
122 lo_ffprintf(fp1, fp2, buf);
125 /* prepare buffer with two arguments, then print to fp1 and fp2 */
126 static inline
127 void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
129 sprintf(buf, fmt, arg1, arg2);
130 lo_ffprintf(fp1, fp2, buf);
133 typedef struct {
134 int ncl;
135 int *cl;
136 } t_clusters;
138 typedef struct {
139 int nr;
140 int *nb;
141 } t_nnb;
143 static void mc_optimize(FILE *log, t_mat *m, real *time,
144 int maxiter, int nrandom,
145 int seed, real kT,
146 const char *conv, gmx_output_env_t *oenv)
148 FILE *fp = nullptr;
149 real ecur, enext, emin, prob, enorm;
150 int i, j, iswap, jswap, nn, nuphill = 0;
151 t_mat *minimum;
153 if (seed == 0)
155 seed = static_cast<int>(gmx::makeRandomSeed());
157 gmx::DefaultRandomEngine rng(seed);
159 if (m->n1 != m->nn)
161 fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
162 return;
164 printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
165 printf("by reordering the frames to minimize the path between the two structures\n");
166 printf("that have the largest pairwise RMSD.\n");
167 printf("Using random seed %d.\n", seed);
169 iswap = jswap = -1;
170 enorm = m->mat[0][0];
171 for (i = 0; (i < m->n1); i++)
173 for (j = 0; (j < m->nn); j++)
175 if (m->mat[i][j] > enorm)
177 enorm = m->mat[i][j];
178 iswap = i;
179 jswap = j;
183 if ((iswap == -1) || (jswap == -1))
185 fprintf(stderr, "Matrix contains identical values in all fields\n");
186 return;
188 swap_rows(m, 0, iswap);
189 swap_rows(m, m->n1-1, jswap);
190 emin = ecur = mat_energy(m);
191 printf("Largest distance %g between %d and %d. Energy: %g.\n",
192 enorm, iswap, jswap, emin);
194 nn = m->nn;
196 /* Initiate and store global minimum */
197 minimum = init_mat(nn, m->b1D);
198 minimum->nn = nn;
199 copy_t_mat(minimum, m);
201 if (nullptr != conv)
203 fp = xvgropen(conv, "Convergence of the MC optimization",
204 "Energy", "Step", oenv);
207 gmx::UniformIntDistribution<int> intDistNN(1, nn-2); // [1,nn-2]
208 gmx::UniformRealDistribution<real> realDistOne; // [0,1)
210 for (i = 0; (i < maxiter); i++)
212 /* Generate new swapping candidates */
215 iswap = intDistNN(rng);
216 jswap = intDistNN(rng);
218 while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
220 /* Apply swap and compute energy */
221 swap_rows(m, iswap, jswap);
222 enext = mat_energy(m);
224 /* Compute probability */
225 prob = 0;
226 if ((enext < ecur) || (i < nrandom))
228 prob = 1;
229 if (enext < emin)
231 /* Store global minimum */
232 copy_t_mat(minimum, m);
233 emin = enext;
236 else if (kT > 0)
238 /* Try Monte Carlo step */
239 prob = std::exp(-(enext-ecur)/(enorm*kT));
242 if (prob == 1 || realDistOne(rng) < prob)
244 if (enext > ecur)
246 nuphill++;
249 fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
250 i, iswap, jswap, enext, prob);
251 if (nullptr != fp)
253 fprintf(fp, "%6d %10g\n", i, enext);
255 ecur = enext;
257 else
259 swap_rows(m, jswap, iswap);
262 fprintf(log, "%d uphill steps were taken during optimization\n",
263 nuphill);
265 /* Now swap the matrix to get it into global minimum mode */
266 copy_t_mat(m, minimum);
268 fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
269 fprintf(log, "Global minimum energy %g\n", mat_energy(m));
270 fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
271 for (i = 0; (i < m->nn); i++)
273 fprintf(log, "%10g %5d %10g\n",
274 time[m->m_ind[i]],
275 m->m_ind[i],
276 (i < m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
279 if (nullptr != fp)
281 xvgrclose(fp);
285 static void calc_dist(int nind, rvec x[], real **d)
287 int i, j;
288 real *xi;
289 rvec dx;
291 for (i = 0; (i < nind-1); i++)
293 xi = x[i];
294 for (j = i+1; (j < nind); j++)
296 /* Should use pbc_dx when analysing multiple molecueles,
297 * but the box is not stored for every frame.
299 rvec_sub(xi, x[j], dx);
300 d[i][j] = norm(dx);
305 static real rms_dist(int isize, real **d, real **d_r)
307 int i, j;
308 real r, r2;
310 r2 = 0.0;
311 for (i = 0; (i < isize-1); i++)
313 for (j = i+1; (j < isize); j++)
315 r = d[i][j]-d_r[i][j];
316 r2 += r*r;
319 r2 /= gmx::exactDiv(isize*(isize-1), 2);
321 return std::sqrt(r2);
324 static bool rms_dist_comp(const t_dist &a, const t_dist &b)
326 return a.dist < b.dist;
329 static bool clust_id_comp(const t_clustid &a, const t_clustid &b)
331 return a.clust < b.clust;
334 static bool nrnb_comp(const t_nnb &a, const t_nnb &b)
336 /* return b<a, we want highest first */
337 return b.nr < a.nr;
340 static void gather(t_mat *m, real cutoff, t_clusters *clust)
342 t_clustid *c;
343 t_dist *d;
344 int i, j, k, nn, cid, n1, diff;
345 gmx_bool bChange;
347 /* First we sort the entries in the RMSD matrix */
348 n1 = m->nn;
349 nn = ((n1-1)*n1)/2;
350 snew(d, nn);
351 for (i = k = 0; (i < n1); i++)
353 for (j = i+1; (j < n1); j++, k++)
355 d[k].i = i;
356 d[k].j = j;
357 d[k].dist = m->mat[i][j];
360 if (k != nn)
362 gmx_incons("gather algortihm");
364 std::sort(d, d+nn, rms_dist_comp);
366 /* Now we make a cluster index for all of the conformations */
367 c = new_clustid(n1);
369 /* Now we check the closest structures, and equalize their cluster numbers */
370 fprintf(stderr, "Linking structures ");
373 fprintf(stderr, "*");
374 bChange = FALSE;
375 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
377 diff = c[d[k].j].clust - c[d[k].i].clust;
378 if (diff)
380 bChange = TRUE;
381 if (diff > 0)
383 c[d[k].j].clust = c[d[k].i].clust;
385 else
387 c[d[k].i].clust = c[d[k].j].clust;
392 while (bChange);
393 fprintf(stderr, "\nSorting and renumbering clusters\n");
394 /* Sort on cluster number */
395 std::sort(c, c+n1, clust_id_comp);
397 /* Renumber clusters */
398 cid = 1;
399 for (k = 1; k < n1; k++)
401 if (c[k].clust != c[k-1].clust)
403 c[k-1].clust = cid;
404 cid++;
406 else
408 c[k-1].clust = cid;
411 c[k-1].clust = cid;
412 if (debug)
414 for (k = 0; (k < n1); k++)
416 fprintf(debug, "Cluster index for conformation %d: %d\n",
417 c[k].conf, c[k].clust);
420 clust->ncl = cid;
421 for (k = 0; k < n1; k++)
423 clust->cl[c[k].conf] = c[k].clust;
426 sfree(c);
427 sfree(d);
430 static gmx_bool jp_same(int **nnb, int i, int j, int P)
432 gmx_bool bIn;
433 int k, ii, jj, pp;
435 bIn = FALSE;
436 for (k = 0; nnb[i][k] >= 0; k++)
438 bIn = bIn || (nnb[i][k] == j);
440 if (!bIn)
442 return FALSE;
445 bIn = FALSE;
446 for (k = 0; nnb[j][k] >= 0; k++)
448 bIn = bIn || (nnb[j][k] == i);
450 if (!bIn)
452 return FALSE;
455 pp = 0;
456 for (ii = 0; nnb[i][ii] >= 0; ii++)
458 for (jj = 0; nnb[j][jj] >= 0; jj++)
460 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
462 pp++;
467 return (pp >= P);
470 static void jarvis_patrick(int n1, real **mat, int M, int P,
471 real rmsdcut, t_clusters *clust)
473 t_dist *row;
474 t_clustid *c;
475 int **nnb;
476 int i, j, k, cid, diff, maxval;
477 gmx_bool bChange;
478 real **mcpy = nullptr;
480 if (rmsdcut < 0)
482 rmsdcut = 10000;
485 /* First we sort the entries in the RMSD matrix row by row.
486 * This gives us the nearest neighbor list.
488 snew(nnb, n1);
489 snew(row, n1);
490 for (i = 0; (i < n1); i++)
492 for (j = 0; (j < n1); j++)
494 row[j].j = j;
495 row[j].dist = mat[i][j];
497 std::sort(row, row+n1, rms_dist_comp);
498 if (M > 0)
500 /* Put the M nearest neighbors in the list */
501 snew(nnb[i], M+1);
502 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
504 if (row[j].j != i)
506 nnb[i][k] = row[j].j;
507 k++;
510 nnb[i][k] = -1;
512 else
514 /* Put all neighbors nearer than rmsdcut in the list */
515 maxval = 0;
516 k = 0;
517 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
519 if (row[j].j != i)
521 if (k >= maxval)
523 maxval += 10;
524 srenew(nnb[i], maxval);
526 nnb[i][k] = row[j].j;
527 k++;
530 if (k == maxval)
532 srenew(nnb[i], maxval+1);
534 nnb[i][k] = -1;
537 sfree(row);
538 if (debug)
540 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
541 for (i = 0; (i < n1); i++)
543 fprintf(debug, "i:%5d nbs:", i);
544 for (j = 0; nnb[i][j] >= 0; j++)
546 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
548 fprintf(debug, "\n");
552 c = new_clustid(n1);
553 fprintf(stderr, "Linking structures ");
554 /* Use mcpy for temporary storage of booleans */
555 mcpy = mk_matrix(n1, n1, FALSE);
556 for (i = 0; i < n1; i++)
558 for (j = i+1; j < n1; j++)
560 mcpy[i][j] = static_cast<real>(jp_same(nnb, i, j, P));
565 fprintf(stderr, "*");
566 bChange = FALSE;
567 for (i = 0; i < n1; i++)
569 for (j = i+1; j < n1; j++)
571 if (mcpy[i][j] != 0.0f)
573 diff = c[j].clust - c[i].clust;
574 if (diff)
576 bChange = TRUE;
577 if (diff > 0)
579 c[j].clust = c[i].clust;
581 else
583 c[i].clust = c[j].clust;
590 while (bChange);
592 fprintf(stderr, "\nSorting and renumbering clusters\n");
593 /* Sort on cluster number */
594 std::sort(c, c+n1, clust_id_comp);
596 /* Renumber clusters */
597 cid = 1;
598 for (k = 1; k < n1; k++)
600 if (c[k].clust != c[k-1].clust)
602 c[k-1].clust = cid;
603 cid++;
605 else
607 c[k-1].clust = cid;
610 c[k-1].clust = cid;
611 clust->ncl = cid;
612 for (k = 0; k < n1; k++)
614 clust->cl[c[k].conf] = c[k].clust;
616 if (debug)
618 for (k = 0; (k < n1); k++)
620 fprintf(debug, "Cluster index for conformation %d: %d\n",
621 c[k].conf, c[k].clust);
625 done_matrix(n1, &mcpy);
627 sfree(c);
628 for (i = 0; (i < n1); i++)
630 sfree(nnb[i]);
632 sfree(nnb);
635 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
637 int i, j;
639 /* dump neighbor list */
640 fprintf(fp, "%s", title);
641 for (i = 0; (i < n1); i++)
643 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
644 for (j = 0; j < nnb[i].nr; j++)
646 fprintf(fp, "%5d", nnb[i].nb[j]);
648 fprintf(fp, "\n");
652 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
654 t_nnb *nnb;
655 int i, j, k, j1, maxval;
657 /* Put all neighbors nearer than rmsdcut in the list */
658 fprintf(stderr, "Making list of neighbors within cutoff ");
659 snew(nnb, n1);
660 for (i = 0; (i < n1); i++)
662 maxval = 0;
663 k = 0;
664 /* put all neighbors within cut-off in list */
665 for (j = 0; j < n1; j++)
667 if (mat[i][j] < rmsdcut)
669 if (k >= maxval)
671 maxval += 10;
672 srenew(nnb[i].nb, maxval);
674 nnb[i].nb[k] = j;
675 k++;
678 /* store nr of neighbors, we'll need that */
679 nnb[i].nr = k;
680 if (i%(1+n1/100) == 0)
682 fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
685 fprintf(stderr, "%3d%%\n", 100);
687 /* sort neighbor list on number of neighbors, largest first */
688 std::sort(nnb, nnb+n1, nrnb_comp);
690 if (debug)
692 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
695 /* turn first structure with all its neighbors (largest) into cluster
696 remove them from pool of structures and repeat for all remaining */
697 fprintf(stderr, "Finding clusters %4d", 0);
698 /* cluster id's start at 1: */
699 k = 1;
700 while (nnb[0].nr)
702 /* set cluster id (k) for first item in neighborlist */
703 for (j = 0; j < nnb[0].nr; j++)
705 clust->cl[nnb[0].nb[j]] = k;
707 /* mark as done */
708 nnb[0].nr = 0;
709 sfree(nnb[0].nb);
711 /* adjust number of neighbors for others, taking removals into account: */
712 for (i = 1; i < n1 && nnb[i].nr; i++)
714 j1 = 0;
715 for (j = 0; j < nnb[i].nr; j++)
717 /* if this neighbor wasn't removed */
718 if (clust->cl[nnb[i].nb[j]] == 0)
720 /* shift the rest (j1<=j) */
721 nnb[i].nb[j1] = nnb[i].nb[j];
722 /* next */
723 j1++;
726 /* now j1 is the new number of neighbors */
727 nnb[i].nr = j1;
729 /* sort again on nnb[].nr, because we have new # neighbors: */
730 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
731 std::sort(nnb, nnb+i, nrnb_comp);
733 fprintf(stderr, "\b\b\b\b%4d", k);
734 /* new cluster id */
735 k++;
737 fprintf(stderr, "\n");
738 sfree(nnb);
739 if (debug)
741 fprintf(debug, "Clusters (%d):\n", k);
742 for (i = 0; i < n1; i++)
744 fprintf(debug, " %3d", clust->cl[i]);
746 fprintf(debug, "\n");
749 clust->ncl = k-1;
752 static rvec **read_whole_trj(const char *fn, int isize, const int index[], int skip,
753 int *nframe, real **time, matrix **boxes, int **frameindices, const gmx_output_env_t *oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
755 rvec **xx, *x;
756 matrix box;
757 real t;
758 int i, j, max_nf;
759 int natom;
760 t_trxstatus *status;
763 max_nf = 0;
764 xx = nullptr;
765 *time = nullptr;
766 natom = read_first_x(oenv, &status, fn, &t, &x, box);
767 i = 0;
768 int clusterIndex = 0;
771 if (bPBC)
773 gmx_rmpbc(gpbc, natom, box, x);
775 if (clusterIndex >= max_nf)
777 max_nf += 10;
778 srenew(xx, max_nf);
779 srenew(*time, max_nf);
780 srenew(*boxes, max_nf);
781 srenew(*frameindices, max_nf);
783 if ((i % skip) == 0)
785 snew(xx[clusterIndex], isize);
786 /* Store only the interesting atoms */
787 for (j = 0; (j < isize); j++)
789 copy_rvec(x[index[j]], xx[clusterIndex][j]);
791 (*time)[clusterIndex] = t;
792 copy_mat(box, (*boxes)[clusterIndex]);
793 (*frameindices)[clusterIndex] = nframes_read(status);
794 clusterIndex++;
796 i++;
798 while (read_next_x(oenv, status, &t, x, box));
799 fprintf(stderr, "Allocated %zu bytes for frames\n",
800 (max_nf*isize*sizeof(**xx)));
801 fprintf(stderr, "Read %d frames from trajectory %s\n", clusterIndex, fn);
802 *nframe = clusterIndex;
803 sfree(x);
805 return xx;
808 static int plot_clusters(int nf, real **mat, t_clusters *clust,
809 int minstruct)
811 int i, j, ncluster, ci;
812 int *cl_id, *nstruct, *strind;
814 snew(cl_id, nf);
815 snew(nstruct, nf);
816 snew(strind, nf);
817 for (i = 0; i < nf; i++)
819 strind[i] = 0;
820 cl_id[i] = clust->cl[i];
821 nstruct[cl_id[i]]++;
823 ncluster = 0;
824 for (i = 0; i < nf; i++)
826 if (nstruct[i] >= minstruct)
828 ncluster++;
829 for (j = 0; (j < nf); j++)
831 if (cl_id[j] == i)
833 strind[j] = ncluster;
838 ncluster++;
839 fprintf(stderr, "There are %d clusters with at least %d conformations\n",
840 ncluster, minstruct);
842 for (i = 0; (i < nf); i++)
844 ci = cl_id[i];
845 for (j = 0; j < i; j++)
847 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
849 /* color different clusters with different colors, as long as
850 we don't run out of colors */
851 mat[i][j] = strind[i];
853 else
855 mat[i][j] = 0;
859 sfree(strind);
860 sfree(nstruct);
861 sfree(cl_id);
863 return ncluster;
866 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
868 int i, j;
870 for (i = 0; i < nf; i++)
872 for (j = 0; j < i; j++)
874 if (clust->cl[i] == clust->cl[j])
876 mat[i][j] = val;
878 else
880 mat[i][j] = 0;
886 static char *parse_filename(const char *fn, int maxnr)
888 int i;
889 char *fnout;
890 const char *ext;
891 char buf[STRLEN];
893 if (std::strchr(fn, '%'))
895 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
897 /* number of digits needed in numbering */
898 i = static_cast<int>((std::log(static_cast<real>(maxnr))/std::log(10.0)) + 1);
899 /* split fn and ext */
900 ext = std::strrchr(fn, '.');
901 if (!ext)
903 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
905 ext++;
906 /* insert e.g. '%03d' between fn and ext */
907 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
908 snew(fnout, std::strlen(buf)+1);
909 std::strcpy(fnout, buf);
911 return fnout;
914 static void ana_trans(t_clusters *clust, int nf,
915 const char *transfn, const char *ntransfn, FILE *log,
916 t_rgb rlo, t_rgb rhi, const gmx_output_env_t *oenv)
918 FILE *fp;
919 real **trans, *axis;
920 int *ntrans;
921 int i, ntranst, maxtrans;
922 char buf[STRLEN];
924 snew(ntrans, clust->ncl);
925 snew(trans, clust->ncl);
926 snew(axis, clust->ncl);
927 for (i = 0; i < clust->ncl; i++)
929 axis[i] = i+1;
930 snew(trans[i], clust->ncl);
932 ntranst = 0;
933 maxtrans = 0;
934 for (i = 1; i < nf; i++)
936 if (clust->cl[i] != clust->cl[i-1])
938 ntranst++;
939 ntrans[clust->cl[i-1]-1]++;
940 ntrans[clust->cl[i]-1]++;
941 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
942 maxtrans = static_cast<int>(std::max(static_cast<real>(maxtrans), trans[clust->cl[i]-1][clust->cl[i-1]-1]));
945 ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
946 "max %d between two specific clusters\n", ntranst, maxtrans);
947 if (transfn)
949 fp = gmx_ffopen(transfn, "w");
950 i = std::min(maxtrans+1, 80);
951 write_xpm(fp, 0, "Cluster Transitions", "# transitions",
952 "from cluster", "to cluster",
953 clust->ncl, clust->ncl, axis, axis, trans,
954 0, maxtrans, rlo, rhi, &i);
955 gmx_ffclose(fp);
957 if (ntransfn)
959 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
960 oenv);
961 for (i = 0; i < clust->ncl; i++)
963 fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
965 xvgrclose(fp);
967 sfree(ntrans);
968 for (i = 0; i < clust->ncl; i++)
970 sfree(trans[i]);
972 sfree(trans);
973 sfree(axis);
976 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
977 int natom, t_atoms *atoms, rvec *xtps,
978 real *mass, rvec **xx, real *time,
979 matrix *boxes, int *frameindices,
980 int ifsize, int *fitidx,
981 int iosize, int *outidx,
982 const char *trxfn, const char *sizefn,
983 const char *transfn, const char *ntransfn,
984 const char *clustidfn, const char *clustndxfn, gmx_bool bAverage,
985 int write_ncl, int write_nst, real rmsmin,
986 gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
987 const gmx_output_env_t *oenv)
989 FILE *size_fp = nullptr;
990 FILE *ndxfn = nullptr;
991 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
992 t_trxstatus *trxout = nullptr;
993 t_trxstatus *trxsout = nullptr;
994 int i, i1, cl, nstr, *structure, first = 0, midstr;
995 gmx_bool *bWrite = nullptr;
996 real r, clrmsd, midrmsd;
997 rvec *xav = nullptr;
998 matrix zerobox;
1000 clear_mat(zerobox);
1002 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1003 trxsfn = nullptr;
1004 if (trxfn)
1006 /* do we write all structures? */
1007 if (write_ncl)
1009 trxsfn = parse_filename(trxfn, std::max(write_ncl, clust->ncl));
1010 snew(bWrite, nf);
1012 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1013 bAverage ? "average" : "middle", trxfn);
1014 if (write_ncl)
1016 /* find out what we want to tell the user:
1017 Writing [all structures|structures with rmsd > %g] for
1018 {all|first %d} clusters {with more than %d structures} to %s */
1019 if (rmsmin > 0.0)
1021 sprintf(buf1, "structures with rmsd > %g", rmsmin);
1023 else
1025 sprintf(buf1, "all structures");
1027 buf2[0] = buf3[0] = '\0';
1028 if (write_ncl >= clust->ncl)
1030 if (write_nst == 0)
1032 sprintf(buf2, "all ");
1035 else
1037 sprintf(buf2, "the first %d ", write_ncl);
1039 if (write_nst)
1041 sprintf(buf3, " with more than %d structures", write_nst);
1043 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1044 ffprintf(stderr, log, buf);
1047 /* Prepare a reference structure for the orientation of the clusters */
1048 if (bFit)
1050 reset_x(ifsize, fitidx, natom, nullptr, xtps, mass);
1052 trxout = open_trx(trxfn, "w");
1053 /* Calculate the average structure in each cluster, *
1054 * all structures are fitted to the first struture of the cluster */
1055 snew(xav, natom);
1056 GMX_ASSERT(xav, "");
1059 if (transfn || ntransfn)
1061 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1064 if (clustidfn)
1066 FILE *fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1067 if (output_env_get_print_xvgr_codes(oenv))
1069 fprintf(fp, "@ s0 symbol 2\n");
1070 fprintf(fp, "@ s0 symbol size 0.2\n");
1071 fprintf(fp, "@ s0 linestyle 0\n");
1073 for (i = 0; i < nf; i++)
1075 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1077 xvgrclose(fp);
1079 if (sizefn)
1081 size_fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1082 if (output_env_get_print_xvgr_codes(oenv))
1084 fprintf(size_fp, "@g%d type %s\n", 0, "bar");
1087 if (clustndxfn && frameindices)
1089 ndxfn = gmx_ffopen(clustndxfn, "w");
1092 snew(structure, nf);
1093 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n",
1094 "cl.", "#st", "rmsd", "middle", "rmsd");
1095 for (cl = 1; cl <= clust->ncl; cl++)
1097 /* prepare structures (fit, middle, average) */
1098 if (xav)
1100 for (i = 0; i < natom; i++)
1102 clear_rvec(xav[i]);
1105 nstr = 0;
1106 for (i1 = 0; i1 < nf; i1++)
1108 if (clust->cl[i1] == cl)
1110 structure[nstr] = i1;
1111 nstr++;
1112 if (trxfn && (bAverage || write_ncl) )
1114 if (bFit)
1116 reset_x(ifsize, fitidx, natom, nullptr, xx[i1], mass);
1118 if (nstr == 1)
1120 first = i1;
1122 else if (bFit)
1124 do_fit(natom, mass, xx[first], xx[i1]);
1126 if (xav)
1128 for (i = 0; i < natom; i++)
1130 rvec_inc(xav[i], xx[i1][i]);
1136 if (sizefn)
1138 fprintf(size_fp, "%8d %8d\n", cl, nstr);
1140 if (ndxfn)
1142 fprintf(ndxfn, "[Cluster_%04d]\n", cl);
1144 clrmsd = 0;
1145 midstr = 0;
1146 midrmsd = 10000;
1147 for (i1 = 0; i1 < nstr; i1++)
1149 r = 0;
1150 if (nstr > 1)
1152 for (i = 0; i < nstr; i++)
1154 if (i < i1)
1156 r += rmsd[structure[i]][structure[i1]];
1158 else
1160 r += rmsd[structure[i1]][structure[i]];
1163 r /= (nstr - 1);
1165 if (r < midrmsd)
1167 midstr = structure[i1];
1168 midrmsd = r;
1170 clrmsd += r;
1172 clrmsd /= nstr;
1174 /* dump cluster info to logfile */
1175 if (nstr > 1)
1177 sprintf(buf1, "%6.3f", clrmsd);
1178 if (buf1[0] == '0')
1180 buf1[0] = ' ';
1182 sprintf(buf2, "%5.3f", midrmsd);
1183 if (buf2[0] == '0')
1185 buf2[0] = ' ';
1188 else
1190 sprintf(buf1, "%5s", "");
1191 sprintf(buf2, "%5s", "");
1193 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1194 for (i = 0; i < nstr; i++)
1196 if ((i % 7 == 0) && i)
1198 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1199 if (ndxfn)
1201 fprintf(ndxfn, "\n");
1204 else
1206 buf[0] = '\0';
1208 i1 = structure[i];
1209 fprintf(log, "%s %6g", buf, time[i1]);
1210 if (ndxfn)
1212 fprintf(ndxfn, " %6d", frameindices[i1] + 1);
1215 fprintf(log, "\n");
1216 if (ndxfn)
1218 fprintf(ndxfn, "\n");
1221 /* write structures to trajectory file(s) */
1222 if (trxfn)
1224 if (write_ncl)
1226 for (i = 0; i < nstr; i++)
1228 bWrite[i] = FALSE;
1231 if (cl < write_ncl+1 && nstr > write_nst)
1233 /* Dump all structures for this cluster */
1234 /* generate numbered filename (there is a %d in trxfn!) */
1235 sprintf(buf, trxsfn, cl);
1236 trxsout = open_trx(buf, "w");
1237 for (i = 0; i < nstr; i++)
1239 bWrite[i] = TRUE;
1240 if (rmsmin > 0.0)
1242 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1244 if (bWrite[i1])
1246 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1250 if (bWrite[i])
1252 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], boxes[structure[i]],
1253 xx[structure[i]], nullptr, nullptr);
1256 close_trx(trxsout);
1258 /* Dump the average structure for this cluster */
1259 if (bAverage)
1261 for (i = 0; i < natom; i++)
1263 svmul(1.0/nstr, xav[i], xav[i]);
1266 else
1268 for (i = 0; i < natom; i++)
1270 copy_rvec(xx[midstr][i], xav[i]);
1272 if (bFit)
1274 reset_x(ifsize, fitidx, natom, nullptr, xav, mass);
1277 if (bFit)
1279 do_fit(natom, mass, xtps, xav);
1281 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], boxes[midstr], xav, nullptr, nullptr);
1284 /* clean up */
1285 if (trxfn)
1287 close_trx(trxout);
1288 sfree(xav);
1289 if (write_ncl)
1291 sfree(bWrite);
1294 sfree(structure);
1295 if (trxsfn)
1297 sfree(trxsfn);
1300 if (size_fp)
1302 xvgrclose(size_fp);
1304 if (ndxfn)
1306 gmx_ffclose(ndxfn);
1310 static void convert_mat(t_matrix *mat, t_mat *rms)
1312 int i, j;
1314 rms->n1 = mat->nx;
1315 matrix2real(mat, rms->mat);
1316 /* free input xpm matrix data */
1317 for (i = 0; i < mat->nx; i++)
1319 sfree(mat->matrix[i]);
1321 sfree(mat->matrix);
1323 for (i = 0; i < mat->nx; i++)
1325 for (j = i; j < mat->nx; j++)
1327 rms->sumrms += rms->mat[i][j];
1328 rms->maxrms = std::max(rms->maxrms, rms->mat[i][j]);
1329 if (j != i)
1331 rms->minrms = std::min(rms->minrms, rms->mat[i][j]);
1335 rms->nn = mat->nx;
1338 int gmx_cluster(int argc, char *argv[])
1340 const char *desc[] = {
1341 "[THISMODULE] can cluster structures using several different methods.",
1342 "Distances between structures can be determined from a trajectory",
1343 "or read from an [REF].xpm[ref] matrix file with the [TT]-dm[tt] option.",
1344 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1345 "can be used to define the distance between structures.[PAR]",
1347 "single linkage: add a structure to a cluster when its distance to any",
1348 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1350 "Jarvis Patrick: add a structure to a cluster when this structure",
1351 "and a structure in the cluster have each other as neighbors and",
1352 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1353 "of a structure are the M closest structures or all structures within",
1354 "[TT]cutoff[tt].[PAR]",
1356 "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1357 "the order of the frames is using the smallest possible increments.",
1358 "With this it is possible to make a smooth animation going from one",
1359 "structure to another with the largest possible (e.g.) RMSD between",
1360 "them, however the intermediate steps should be as small as possible.",
1361 "Applications could be to visualize a potential of mean force",
1362 "ensemble of simulations or a pulling simulation. Obviously the user",
1363 "has to prepare the trajectory well (e.g. by not superimposing frames).",
1364 "The final result can be inspect visually by looking at the matrix",
1365 "[REF].xpm[ref] file, which should vary smoothly from bottom to top.[PAR]",
1367 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1369 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1370 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1371 "Count number of neighbors using cut-off, take structure with",
1372 "largest number of neighbors with all its neighbors as cluster",
1373 "and eliminate it from the pool of clusters. Repeat for remaining",
1374 "structures in pool.[PAR]",
1376 "When the clustering algorithm assigns each structure to exactly one",
1377 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1378 "file is supplied, the structure with",
1379 "the smallest average distance to the others or the average structure",
1380 "or all structures for each cluster will be written to a trajectory",
1381 "file. When writing all structures, separate numbered files are made",
1382 "for each cluster.[PAR]",
1384 "Two output files are always written:",
1386 " * [TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1387 " and a graphical depiction of the clusters in the lower right half",
1388 " When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1389 " when two structures are in the same cluster.",
1390 " When [TT]-minstruct[tt] > 1 different colors will be used for each",
1391 " cluster.",
1392 " * [TT]-g[tt] writes information on the options used and a detailed list",
1393 " of all clusters and their members.",
1396 "Additionally, a number of optional output files can be written:",
1398 " * [TT]-dist[tt] writes the RMSD distribution.",
1399 " * [TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1400 " diagonalization.",
1401 " * [TT]-sz[tt] writes the cluster sizes.",
1402 " * [TT]-tr[tt] writes a matrix of the number transitions between",
1403 " cluster pairs.",
1404 " * [TT]-ntr[tt] writes the total number of transitions to or from",
1405 " each cluster.",
1406 " * [TT]-clid[tt] writes the cluster number as a function of time.",
1407 " * [TT]-clndx[tt] writes the frame numbers corresponding to the clusters to the",
1408 " specified index file to be read into trjconv.",
1409 " * [TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1410 " structure of each cluster or writes numbered files with cluster members",
1411 " for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1412 " [TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1413 " structure with the smallest average RMSD from all other structures",
1414 " of the cluster.",
1417 FILE *fp, *log;
1418 int nf = 0, i, i1, i2, j;
1419 int64_t nrms = 0;
1421 matrix box;
1422 matrix *boxes = nullptr;
1423 rvec *xtps, *usextps, *x1, **xx = nullptr;
1424 const char *fn, *trx_out_fn;
1425 t_clusters clust;
1426 t_mat *rms, *orig = nullptr;
1427 real *eigenvalues;
1428 t_topology top;
1429 int ePBC;
1430 t_atoms useatoms;
1431 t_matrix *readmat = nullptr;
1432 real *eigenvectors;
1434 int isize = 0, ifsize = 0, iosize = 0;
1435 int *index = nullptr, *fitidx = nullptr, *outidx = nullptr, *frameindices = nullptr;
1436 char *grpname;
1437 real rmsd, **d1, **d2, *time = nullptr, time_invfac, *mass = nullptr;
1438 char buf[STRLEN], buf1[80];
1439 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1441 int method, ncluster = 0;
1442 static const char *methodname[] = {
1443 nullptr, "linkage", "jarvis-patrick", "monte-carlo",
1444 "diagonalization", "gromos", nullptr
1446 enum {
1447 m_null, m_linkage, m_jarvis_patrick,
1448 m_monte_carlo, m_diagonalize, m_gromos, m_nr
1450 /* Set colors for plotting: white = zero RMS, black = maximum */
1451 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1452 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1453 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1454 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1455 static int nlevels = 40, skip = 1;
1456 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1457 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1458 static int niter = 10000, nrandom = 0, seed = 0, write_ncl = 0, write_nst = 1, minstruct = 1;
1459 static real kT = 1e-3;
1460 static int M = 10, P = 3;
1461 gmx_output_env_t *oenv;
1462 gmx_rmpbc_t gpbc = nullptr;
1464 t_pargs pa[] = {
1465 { "-dista", FALSE, etBOOL, {&bRMSdist},
1466 "Use RMSD of distances instead of RMS deviation" },
1467 { "-nlevels", FALSE, etINT, {&nlevels},
1468 "Discretize RMSD matrix in this number of levels" },
1469 { "-cutoff", FALSE, etREAL, {&rmsdcut},
1470 "RMSD cut-off (nm) for two structures to be neighbor" },
1471 { "-fit", FALSE, etBOOL, {&bFit},
1472 "Use least squares fitting before RMSD calculation" },
1473 { "-max", FALSE, etREAL, {&scalemax},
1474 "Maximum level in RMSD matrix" },
1475 { "-skip", FALSE, etINT, {&skip},
1476 "Only analyze every nr-th frame" },
1477 { "-av", FALSE, etBOOL, {&bAverage},
1478 "Write average instead of middle structure for each cluster" },
1479 { "-wcl", FALSE, etINT, {&write_ncl},
1480 "Write the structures for this number of clusters to numbered files" },
1481 { "-nst", FALSE, etINT, {&write_nst},
1482 "Only write all structures if more than this number of structures per cluster" },
1483 { "-rmsmin", FALSE, etREAL, {&rmsmin},
1484 "minimum rms difference with rest of cluster for writing structures" },
1485 { "-method", FALSE, etENUM, {methodname},
1486 "Method for cluster determination" },
1487 { "-minstruct", FALSE, etINT, {&minstruct},
1488 "Minimum number of structures in cluster for coloring in the [REF].xpm[ref] file" },
1489 { "-binary", FALSE, etBOOL, {&bBinary},
1490 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1491 "is given by [TT]-cutoff[tt]" },
1492 { "-M", FALSE, etINT, {&M},
1493 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1494 "0 is use cutoff" },
1495 { "-P", FALSE, etINT, {&P},
1496 "Number of identical nearest neighbors required to form a cluster" },
1497 { "-seed", FALSE, etINT, {&seed},
1498 "Random number seed for Monte Carlo clustering algorithm (0 means generate)" },
1499 { "-niter", FALSE, etINT, {&niter},
1500 "Number of iterations for MC" },
1501 { "-nrandom", FALSE, etINT, {&nrandom},
1502 "The first iterations for MC may be done complete random, to shuffle the frames" },
1503 { "-kT", FALSE, etREAL, {&kT},
1504 "Boltzmann weighting factor for Monte Carlo optimization "
1505 "(zero turns off uphill steps)" },
1506 { "-pbc", FALSE, etBOOL,
1507 { &bPBC }, "PBC check" }
1509 t_filenm fnm[] = {
1510 { efTRX, "-f", nullptr, ffOPTRD },
1511 { efTPS, "-s", nullptr, ffREAD },
1512 { efNDX, nullptr, nullptr, ffOPTRD },
1513 { efXPM, "-dm", "rmsd", ffOPTRD },
1514 { efXPM, "-om", "rmsd-raw", ffWRITE },
1515 { efXPM, "-o", "rmsd-clust", ffWRITE },
1516 { efLOG, "-g", "cluster", ffWRITE },
1517 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1518 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1519 { efXVG, "-conv", "mc-conv", ffOPTWR },
1520 { efXVG, "-sz", "clust-size", ffOPTWR},
1521 { efXPM, "-tr", "clust-trans", ffOPTWR},
1522 { efXVG, "-ntr", "clust-trans", ffOPTWR},
1523 { efXVG, "-clid", "clust-id", ffOPTWR},
1524 { efTRX, "-cl", "clusters.pdb", ffOPTWR },
1525 { efNDX, "-clndx", "clusters.ndx", ffOPTWR }
1527 #define NFILE asize(fnm)
1529 if (!parse_common_args(&argc, argv,
1530 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT,
1531 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr,
1532 &oenv))
1534 return 0;
1537 /* parse options */
1538 bReadMat = opt2bSet("-dm", NFILE, fnm);
1539 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1540 if (opt2parg_bSet("-av", asize(pa), pa) ||
1541 opt2parg_bSet("-wcl", asize(pa), pa) ||
1542 opt2parg_bSet("-nst", asize(pa), pa) ||
1543 opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1544 opt2bSet("-cl", NFILE, fnm) )
1546 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1548 else
1550 trx_out_fn = nullptr;
1552 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1554 fprintf(stderr,
1555 "\nWarning: assuming the time unit in %s is %s\n",
1556 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv).c_str());
1558 if (trx_out_fn && !bReadTraj)
1560 fprintf(stderr, "\nWarning: "
1561 "cannot write cluster structures without reading trajectory\n"
1562 " ignoring option -cl %s\n", trx_out_fn);
1565 method = 1;
1566 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1568 method++;
1570 if (method == m_nr)
1572 gmx_fatal(FARGS, "Invalid method");
1575 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1576 method == m_gromos );
1578 /* Open log file */
1579 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1581 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1582 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1584 /* check input and write parameters to log file */
1585 bUseRmsdCut = FALSE;
1586 if (method == m_jarvis_patrick)
1588 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1589 if ((M < 0) || (M == 1))
1591 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1593 if (M < 2)
1595 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1596 bUseRmsdCut = TRUE;
1598 else
1600 if (P >= M)
1602 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1604 if (bJP_RMSD)
1606 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1607 bUseRmsdCut = TRUE;
1609 else
1611 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1614 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1616 else /* method != m_jarvis */
1618 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1620 if (bUseRmsdCut && method != m_jarvis_patrick)
1622 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1624 if (method == m_monte_carlo)
1626 fprintf(log, "Using %d iterations\n", niter);
1629 if (skip < 1)
1631 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1634 /* get input */
1635 if (bReadTraj)
1637 /* don't read mass-database as masses (and top) are not used */
1638 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xtps, nullptr, box,
1639 TRUE);
1640 if (bPBC)
1642 gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);
1645 fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1646 bReadMat ? "" : " and RMSD calculation");
1647 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1648 1, &ifsize, &fitidx, &grpname);
1649 if (trx_out_fn)
1651 fprintf(stderr, "\nSelect group for output:\n");
1652 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1653 1, &iosize, &outidx, &grpname);
1654 /* merge and convert both index groups: */
1655 /* first copy outidx to index. let outidx refer to elements in index */
1656 snew(index, iosize);
1657 isize = iosize;
1658 for (i = 0; i < iosize; i++)
1660 index[i] = outidx[i];
1661 outidx[i] = i;
1663 /* now lookup elements from fitidx in index, add them if necessary
1664 and also let fitidx refer to elements in index */
1665 for (i = 0; i < ifsize; i++)
1667 j = 0;
1668 while (j < isize && index[j] != fitidx[i])
1670 j++;
1672 if (j >= isize)
1674 /* slow this way, but doesn't matter much */
1675 isize++;
1676 srenew(index, isize);
1678 index[j] = fitidx[i];
1679 fitidx[i] = j;
1682 else /* !trx_out_fn */
1684 isize = ifsize;
1685 snew(index, isize);
1686 for (i = 0; i < ifsize; i++)
1688 index[i] = fitidx[i];
1689 fitidx[i] = i;
1694 if (bReadTraj)
1696 /* Loop over first coordinate file */
1697 fn = opt2fn("-f", NFILE, fnm);
1699 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, &boxes, &frameindices, oenv, bPBC, gpbc);
1700 output_env_conv_times(oenv, nf, time);
1701 if (!bRMSdist || bAnalyze)
1703 /* Center all frames on zero */
1704 snew(mass, isize);
1705 for (i = 0; i < ifsize; i++)
1707 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1709 if (bFit)
1711 for (i = 0; i < nf; i++)
1713 reset_x(ifsize, fitidx, isize, nullptr, xx[i], mass);
1717 if (bPBC)
1719 gmx_rmpbc_done(gpbc);
1723 if (bReadMat)
1725 fprintf(stderr, "Reading rms distance matrix ");
1726 read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1727 fprintf(stderr, "\n");
1728 if (readmat[0].nx != readmat[0].ny)
1730 gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1731 readmat[0].nx, readmat[0].ny);
1733 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1735 gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1736 "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1739 nf = readmat[0].nx;
1740 sfree(time);
1741 time = readmat[0].axis_x;
1742 time_invfac = output_env_get_time_invfactor(oenv);
1743 for (i = 0; i < nf; i++)
1745 time[i] *= time_invfac;
1748 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1749 convert_mat(&(readmat[0]), rms);
1751 nlevels = readmat[0].nmap;
1753 else /* !bReadMat */
1755 rms = init_mat(nf, method == m_diagonalize);
1756 nrms = (static_cast<int64_t>(nf)*static_cast<int64_t>(nf-1))/2;
1757 if (!bRMSdist)
1759 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1760 /* Initialize work array */
1761 snew(x1, isize);
1762 for (i1 = 0; i1 < nf; i1++)
1764 for (i2 = i1+1; i2 < nf; i2++)
1766 for (i = 0; i < isize; i++)
1768 copy_rvec(xx[i1][i], x1[i]);
1770 if (bFit)
1772 do_fit(isize, mass, xx[i2], x1);
1774 rmsd = rmsdev(isize, mass, xx[i2], x1);
1775 set_mat_entry(rms, i1, i2, rmsd);
1777 nrms -= nf-i1-1;
1778 fprintf(stderr, "\r# RMSD calculations left: " "%" PRId64 " ", nrms);
1779 fflush(stderr);
1781 sfree(x1);
1783 else /* bRMSdist */
1785 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1787 /* Initiate work arrays */
1788 snew(d1, isize);
1789 snew(d2, isize);
1790 for (i = 0; (i < isize); i++)
1792 snew(d1[i], isize);
1793 snew(d2[i], isize);
1795 for (i1 = 0; i1 < nf; i1++)
1797 calc_dist(isize, xx[i1], d1);
1798 for (i2 = i1+1; (i2 < nf); i2++)
1800 calc_dist(isize, xx[i2], d2);
1801 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1803 nrms -= nf-i1-1;
1804 fprintf(stderr, "\r# RMSD calculations left: " "%" PRId64 " ", nrms);
1805 fflush(stderr);
1807 /* Clean up work arrays */
1808 for (i = 0; (i < isize); i++)
1810 sfree(d1[i]);
1811 sfree(d2[i]);
1813 sfree(d1);
1814 sfree(d2);
1816 fprintf(stderr, "\n\n");
1818 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1819 rms->minrms, rms->maxrms);
1820 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1821 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1822 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1823 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1825 fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1826 "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1828 if (bAnalyze && (rmsmin < rms->minrms) )
1830 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1831 rmsmin, rms->minrms);
1833 if (bAnalyze && (rmsmin > rmsdcut) )
1835 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1836 rmsmin, rmsdcut);
1839 /* Plot the rmsd distribution */
1840 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1842 if (bBinary)
1844 for (i1 = 0; (i1 < nf); i1++)
1846 for (i2 = 0; (i2 < nf); i2++)
1848 if (rms->mat[i1][i2] < rmsdcut)
1850 rms->mat[i1][i2] = 0;
1852 else
1854 rms->mat[i1][i2] = 1;
1860 snew(clust.cl, nf);
1861 switch (method)
1863 case m_linkage:
1864 /* Now sort the matrix and write it out again */
1865 gather(rms, rmsdcut, &clust);
1866 break;
1867 case m_diagonalize:
1868 /* Do a diagonalization */
1869 snew(eigenvalues, nf);
1870 snew(eigenvectors, nf*nf);
1871 std::memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1872 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1873 sfree(eigenvectors);
1875 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1876 "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1877 for (i = 0; (i < nf); i++)
1879 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1881 xvgrclose(fp);
1882 break;
1883 case m_monte_carlo:
1884 orig = init_mat(rms->nn, FALSE);
1885 orig->nn = rms->nn;
1886 copy_t_mat(orig, rms);
1887 mc_optimize(log, rms, time, niter, nrandom, seed, kT,
1888 opt2fn_null("-conv", NFILE, fnm), oenv);
1889 break;
1890 case m_jarvis_patrick:
1891 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1892 break;
1893 case m_gromos:
1894 gromos(rms->nn, rms->mat, rmsdcut, &clust);
1895 break;
1896 default:
1897 gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1900 if (method == m_monte_carlo || method == m_diagonalize)
1902 fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
1903 mat_energy(rms));
1906 if (bAnalyze)
1908 if (minstruct > 1)
1910 ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1912 else
1914 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1916 init_t_atoms(&useatoms, isize, FALSE);
1917 snew(usextps, isize);
1918 useatoms.resinfo = top.atoms.resinfo;
1919 for (i = 0; i < isize; i++)
1921 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1922 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1923 useatoms.nres = std::max(useatoms.nres, useatoms.atom[i].resind+1);
1924 copy_rvec(xtps[index[i]], usextps[i]);
1926 useatoms.nr = isize;
1927 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time, boxes, frameindices,
1928 ifsize, fitidx, iosize, outidx,
1929 bReadTraj ? trx_out_fn : nullptr,
1930 opt2fn_null("-sz", NFILE, fnm),
1931 opt2fn_null("-tr", NFILE, fnm),
1932 opt2fn_null("-ntr", NFILE, fnm),
1933 opt2fn_null("-clid", NFILE, fnm),
1934 opt2fn_null("-clndx", NFILE, fnm),
1935 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1936 rlo_bot, rhi_bot, oenv);
1937 sfree(boxes);
1938 sfree(frameindices);
1940 gmx_ffclose(log);
1942 if (bBinary && !bAnalyze)
1944 /* Make the clustering visible */
1945 for (i2 = 0; (i2 < nf); i2++)
1947 for (i1 = i2+1; (i1 < nf); i1++)
1949 if (rms->mat[i1][i2] != 0.0f)
1951 rms->mat[i1][i2] = rms->maxrms;
1957 fp = opt2FILE("-o", NFILE, fnm, "w");
1958 fprintf(stderr, "Writing rms distance/clustering matrix ");
1959 if (bReadMat)
1961 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1962 readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1963 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1965 else
1967 auto timeLabel = output_env_get_time_label(oenv);
1968 auto title = gmx::formatString("RMS%sDeviation / Cluster Index",
1969 bRMSdist ? " Distance " : " ");
1970 if (minstruct > 1)
1972 write_xpm_split(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel,
1973 nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
1974 rlo_top, rhi_top, 0.0, ncluster,
1975 &ncluster, TRUE, rlo_bot, rhi_bot);
1977 else
1979 write_xpm(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel,
1980 nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
1981 rlo_top, rhi_top, &nlevels);
1984 fprintf(stderr, "\n");
1985 gmx_ffclose(fp);
1986 if (nullptr != orig)
1988 fp = opt2FILE("-om", NFILE, fnm, "w");
1989 auto timeLabel = output_env_get_time_label(oenv);
1990 auto title = gmx::formatString("RMS%sDeviation", bRMSdist ? " Distance " : " ");
1991 write_xpm(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel,
1992 nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
1993 rlo_top, rhi_top, &nlevels);
1994 gmx_ffclose(fp);
1995 done_mat(&orig);
1996 sfree(orig);
1998 /* now show what we've done */
1999 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
2000 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
2001 if (method == m_diagonalize)
2003 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
2005 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
2006 if (bAnalyze)
2008 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
2009 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
2010 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
2012 do_view(oenv, opt2fn_null("-conv", NFILE, fnm), nullptr);
2014 return 0;