Fix -V
[clav.git] / mutation-finder.c
blobe14e43093097eb9e66efe062252a0d23a08bde19
1 /*
2 * Copyright (c) 2018, S. Gilles <sgilles@math.umd.edu>
4 * Permission to use, copy, modify, and/or distribute this software
5 * for any purpose with or without fee is hereby granted, provided
6 * that the above copyright notice and this permission notice appear
7 * in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS *ALL
10 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING AL*L IMPLIED
11 * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EV*ENT SHALL THE
12 * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
13 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
14 * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
15 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
16 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include <errno.h>
19 #include <limits.h>
20 #include <locale.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <unistd.h>
28 #include "macros.h"
29 #include "quiver.h"
31 /* Flag for whether to print info */
32 static volatile sig_atomic_t status_requested = 0;
34 /* Signal handler */
35 static void
36 handle(int sig);
38 /* Signal handler */
39 static void handle(int sig)
41 /* XXX: I'd like to check for SIGINFO here */
42 if (sig == SIGUSR1) {
43 status_requested = 1;
44 signal(sig, handle);
48 /* Print usage */
49 static void usage(char *argv0)
51 printf("Usage: %s -s <startfile> -e <endfile>\n", argv0);
52 printf(" [ -v ] # Be verbose\n");
53 printf(" [ -R ] # Require all non-frozen in sequence\n");
54 printf(" [ -V ] # Test via valency heuristic, not exactness\n");
55 printf(" [ -c <edgecap> ]\n");
56 printf(" [ -f <frozen> [ -f <frozen> [...] ] ]\n");
57 printf(" [ -l <startlen> ] [ -m <maxlen> ]\n");
58 printf("\n");
59 printf(" startfile: a quiver file representing the start "
60 "state\n");
61 printf(" endfile: a quiver file representing the desired end "
62 "state\n");
63 printf(" edgecap: ignore mutations if the cause edges with"
64 "weights above this\n");
65 printf(" startlen: start with sequences of this length\n");
66 printf(" maxlen: halt after exhausting sequences of this "
67 "length\n");
68 printf(" frozen: a vertex with this name will not be "
69 "mutated\n");
72 static int cmp(const void *a, const void *b)
74 struct vertex **va = (struct vertex **) a;
75 struct vertex **vb = (struct vertex **) b;
77 return strcmp((*va)->name, (*vb)->name);
80 /* Dumb valency-based checksum for graphs. */
81 static uintmax_t valency_checksum(struct quiver const * const q)
83 uintmax_t c_a = 0;
84 uintmax_t c_b = 0;
85 uintmax_t c_c = 0;
86 uintmax_t c_d = 0;
87 const size_t N = sizeof (uintmax_t) * sizeof (char) / 4;
88 struct rational r = { 0 };
91 * This looks really, really stupid, but there is a bit of
92 * thought put into it. Most graphs on which this will run
93 * have, say, <30 vertices, so each sum will probably fit
94 * in quarter of a uintmax_t easily. That makes the or-ing
95 * slightly relevant. Also, the whole point of this heuristic
96 * is that we don't know how the graph could be shuffled,
97 * so dependence on indices is frowned upon, which leaves
98 * most serialization-based checksums out.
100 for (size_t j = 0; j < q->v_num; ++j) {
101 for (size_t k = 0; k < q->v_num; ++k) {
102 quiver_get_sigma(q, j, k, &r);
103 c_a = c_a + r.p;
104 c_b = c_b + r.p * r.p;
105 c_c = c_c + r.p * r.q;
106 c_d = c_d + r.q * r.q;
110 return (c_a << (3 * N)) | (c_b << (2 * N)) | (c_c << N) | c_d;
113 /* Load start/end quivers, figure out frozen vertices, order things, etc. */
114 static int setup(char *start_file, char *end_file, char **frozen, size_t
115 num_frozen, struct quiver *out_qstart, struct quiver *out_qend,
116 size_t *out_num_nonfrozen)
118 struct quiver a = { 0 };
119 struct quiver b = { 0 };
120 FILE *fa = 0;
121 FILE *fb = 0;
122 const char *errstr = 0;
123 int ret = 0;
124 struct vertex **averts = 0;
125 struct vertex **bverts = 0;
126 size_t *a_to_start = 0;
127 size_t *b_to_end = 0;
129 if (!out_qstart ||
130 !out_qend) {
131 fprintf(stderr, L("invalid quivers"));
132 ret = EINVAL;
133 goto done;
136 if (out_qstart->v_num ||
137 out_qend->v_num) {
138 fprintf(stderr, L("output quivers are not empty\n"));
139 ret = EINVAL;
140 goto done;
143 if (!(fa = fopen(start_file, "r"))) {
144 ret = errno;
145 perror(L("fopen"));
146 fprintf(stderr, "cannot open file %s\n", start_file);
147 goto done;
150 if (!(fb = fopen(end_file, "r"))) {
151 ret = errno;
152 perror(L("fopen"));
153 fprintf(stderr, "cannot open file %s\n", end_file);
154 goto done;
157 if ((ret = quiver_load_from_file(&a, fa, &errstr))) {
158 fprintf(stderr, "%s\n", errstr);
159 goto done;
162 if ((ret = quiver_load_from_file(&b, fb, &errstr))) {
163 fprintf(stderr, "%s\n", errstr);
164 goto done;
167 if (a.v_num != b.v_num) {
168 fprintf(stderr, L(
169 "Quivers do not have same number of vertices\n"));
170 ret = EINVAL;
171 goto done;
175 * Make sure there are no duplicate vertices, but don't sort in
176 * place - that would change ordering, and ordering of vertices
177 * must match user input - tuning input so that known prefixes
178 * arise early is sometimes crucial.
180 if (!(averts = calloc(a.v_num, sizeof *averts))) {
181 ret = errno;
182 perror(L("calloc"));
183 goto done;
186 if (!(bverts = calloc(b.v_num, sizeof *bverts))) {
187 ret = errno;
188 perror(L("calloc"));
189 goto done;
192 for (size_t j = 0; j < a.v_num; ++j) {
193 averts[j] = &(a.v[j]);
194 bverts[j] = &(b.v[j]);
197 qsort(averts, a.v_num, sizeof(averts[0]), cmp);
198 qsort(bverts, b.v_num, sizeof(bverts[0]), cmp);
200 for (size_t j = 0; j < a.v_num; ++j) {
201 if (j &&
202 !strcmp(averts[j]->name, averts[j - 1]->name)) {
203 fprintf(stderr, L("Duplicate vertex \"%s\"\n"),
204 averts[j]->name);
205 ret = EINVAL;
206 goto done;
209 int d = strcmp(averts[j]->name, bverts[j]->name);
211 if (d > 0) {
212 fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
213 bverts[j]->name);
214 ret = EINVAL;
215 goto done;
218 if (d < 0) {
219 fprintf(stderr, L("Vertex \"%s\" not in both files\n"),
220 averts[j]->name);
221 ret = EINVAL;
222 goto done;
225 if (averts[j]->fatness != bverts[j]->fatness) {
226 fprintf(stderr, L(
227 "Vertex \"%s\" has inconsistent fatness\n"),
228 averts[j]->name);
229 ret = EINVAL;
230 goto done;
234 /* a_to_s[i] = j <=> "vertex i in a is vertex j in out_qstart" */
235 if (!(a_to_start = calloc(a.v_num, sizeof(*a_to_start)))) {
236 ret = errno;
237 perror(L("calloc"));
238 goto done;
241 if (!(b_to_end = calloc(b.v_num, sizeof(*b_to_end)))) {
242 ret = errno;
243 perror(L("calloc"));
244 goto done;
248 * Order so that frozen vertices are all at the end - required
249 * for incrementation algorithm
251 for (uint_fast8_t want_frozen = 0; want_frozen <= 1; want_frozen++) {
252 for (size_t j = 0; j < a.v_num; ++j) {
253 uint_fast8_t is_frozen = 0;
255 for (size_t k = 0; k < num_frozen; ++k) {
256 is_frozen = is_frozen ||
257 !strcmp(a.v[j].name, frozen[k]);
259 if (is_frozen) {
260 k = num_frozen;
264 if (is_frozen != want_frozen) {
265 continue;
268 if ((ret = quiver_add_vertex(out_qstart,
269 &(a_to_start[j]),
270 a.v[j].name,
271 a.v[j].fatness, 0, 0, 0,
272 &errstr))) {
273 fprintf(stderr, "%s\n", errstr);
274 goto done;
277 if (!want_frozen &&
278 out_num_nonfrozen) {
279 *out_num_nonfrozen = a_to_start[j] + 1;
282 size_t tj = 0;
284 if ((ret = quiver_add_vertex(out_qend, &tj, a.v[j].name,
285 a.v[j].fatness, 0, 0, 0,
286 &errstr))) {
287 fprintf(stderr, "%s\n", errstr);
288 goto done;
291 for (size_t k = 0; k < b.v_num; ++k) {
292 if (!(strcmp(b.v[k].name,
293 out_qend->v[tj].name))) {
294 b_to_end[k] = tj;
300 /* Add the edges in correct order */
301 for (size_t j = 0; j < a.v_num; ++j) {
302 size_t tj = a_to_start[j];
304 for (size_t k = 0; k < a.v_num; ++k) {
305 size_t tk = a_to_start[k];
306 size_t idx_o = tj * out_qstart->v_len + tk;
307 size_t idx_a = j * a.v_len + k;
309 out_qstart->e[idx_o] = a.e[idx_a];
313 for (size_t j = 0; j < b.v_num; ++j) {
314 size_t tj = b_to_end[j];
316 for (size_t k = 0; k < b.v_num; ++k) {
317 size_t tk = b_to_end[k];
318 size_t idx_o = tj * out_qend->v_len + tk;
319 size_t idx_b = j * b.v_len + k;
321 out_qend->e[idx_o] = b.e[idx_b];
325 done:
326 quiver_clean(&a);
327 quiver_clean(&b);
329 if (fa) {
330 fclose(fa);
333 if (fb) {
334 fclose(fb);
337 free(averts);
338 free(bverts);
339 free(a_to_start);
340 free(b_to_end);
342 return ret;
345 /* Main loop */
346 int main(int argc, char **argv)
348 int ret = 0;
349 struct quiver q = { 0 };
350 struct quiver qend = { 0 };
351 char *frozen[argc];
352 size_t fidx = 0;
353 char *start_file = 0;
354 char *end_file = 0;
355 long start_len_l = 0;
356 long max_len_l = 0;
357 size_t start_len = 0;
358 size_t max_len = (size_t) -1;
359 size_t warn_level = (size_t) -1;
360 struct rational *target = 0;
361 char *p = 0;
362 const char *errstr = 0;
363 uint_fast8_t verbose = 0;
364 uint_fast8_t require_all_nonfrozen = 0;
365 uint_fast8_t check_valency_only = 0;
366 int *seen_in_seq = 0;
367 uintmax_t desired_valency_checksum = 0;
368 uintmax_t this_valency_checksum = 0;
370 setlocale(LC_ALL, "");
371 int opt = 0;
373 for (int j = 0; j < argc; ++j) {
374 frozen[j] = 0;
377 while ((opt = getopt(argc, argv, "vhRVs:e:c:m:l:f:")) != -1) {
378 switch (opt) {
379 case 'v':
380 verbose++;
381 break;
382 case 'h':
383 usage(argv[0]);
384 ret = 0;
385 goto done;
386 case 'R':
387 require_all_nonfrozen = 1;
388 break;
389 case 'V':
390 check_valency_only = 1;
391 break;
392 case 's':
393 start_file = optarg;
394 break;
395 case 'e':
396 end_file = optarg;
397 break;
398 case 'c':
399 warn_level = strtoll(optarg, &p, 0);
401 if (p &&
402 *p) {
403 fprintf(stderr, "Invalid edge cap: %s\n",
404 optarg);
405 ret = EINVAL;
406 goto done;
409 break;
410 case 'l':
411 start_len_l = strtoll(optarg, &p, 0);
413 if ((start_len_l <= 0) ||
414 (p &&
415 *p)) {
416 fprintf(stderr, "Invalid start length: %s\n",
417 optarg);
418 ret = EINVAL;
419 goto done;
422 start_len = start_len_l;
423 break;
424 case 'm':
425 max_len_l = strtoll(optarg, &p, 0);
427 if ((max_len_l <= 0) ||
428 (p &&
429 *p)) {
430 fprintf(stderr, "Invalid max length: %s\n",
431 optarg);
432 ret = EINVAL;
433 goto done;
436 max_len = max_len_l;
437 break;
438 case 'f':
439 frozen[fidx] = optarg;
440 fidx++;
441 break;
442 default:
443 usage(argv[0]);
444 ret = EINVAL;
445 goto done;
449 if (!start_file ||
450 !end_file) {
451 usage(argv[0]);
452 ret = EINVAL;
453 goto done;
456 size_t num_nonfrozen = 0;
458 if (setup(start_file, end_file, frozen, fidx, &q, &qend,
459 &num_nonfrozen)) {
460 goto done;
463 if (warn_level == (size_t) -1) {
464 warn_level = 0;
466 for (size_t k = 0; k < q.v_num; ++k) {
467 if (warn_level < q.v[k].fatness) {
468 warn_level = q.v[k].fatness;
472 warn_level = warn_level + 1;
475 if (warn_level >= UINT_MAX) {
476 warn_level = UINT_MAX - 1;
479 if ((ret = quiver_set_warning_threshold(&q, warn_level, &errstr))) {
480 fprintf(stderr, "%s\n", errstr);
481 goto done;
484 /* Save off qend's vertices. XXX: Why not just USE qend? */
485 if ((qend.v_num * qend.v_num) / qend.v_num != qend.v_num) {
486 /* Should never happen - isn't qend.v_num capped? */
487 ret = errno = EOVERFLOW;
488 perror(L(""));
489 goto done;
492 if (!(target = calloc(qend.v_num * qend.v_num, sizeof *target))) {
493 ret = errno;
494 perror(L("calloc"));
495 goto done;
498 if (!(seen_in_seq = calloc(qend.v_num, sizeof *seen_in_seq))) {
499 ret = errno;
500 perror(L("calloc"));
501 goto done;
504 for (size_t i = 0; i < qend.v_num * qend.v_num; ++i) {
505 target[i] = (struct rational) { .p = 0, .q = 1 };
508 for (size_t i = 0; i < qend.v_num; ++i) {
509 for (size_t j = 0; j < qend.v_num; ++j) {
510 target[i * qend.v_num + j] = qend.e[i * qend.v_len + j];
514 if (check_valency_only) {
515 desired_valency_checksum = valency_checksum(&qend);
518 /* Now, the long haul*/
519 size_t len = 0;
521 if (start_len) {
522 len = start_len - 1;
525 signal(SIGUSR1, handle);
526 size_t s = 0;
527 size_t c_idx;
528 int warn = 0;
529 uint_fast8_t correct = 0;
531 while (len < max_len) {
532 len++;
533 size_t sequence[len];
535 for (size_t k = 0; k < num_nonfrozen; ++k) {
536 seen_in_seq[k] = 0;
539 for (size_t k = 0; k < len; ++k) {
540 /* This will get pruned, it's okay */
541 sequence[k] = 0;
542 seen_in_seq[sequence[k]]++;
545 c_idx = 0;
546 have_full_sequence:
548 if (status_requested) {
549 status_requested = 0;
550 printf("Current sequence %s", q.v[sequence[0]].name);
552 for (size_t k = 1; k < len; ++k) {
553 printf(", %s", q.v[sequence[k]].name);
556 printf("\n");
557 fflush(stdout);
561 * Here, we have computed a full sequence that we want
562 * to check. c_idx is in [0, len - 1], and the quiver
563 * has been mutated following the sequence up to (but
564 * not including) c_idx.
566 * This code is duplicated from increment_sequence
567 * because it is reasonable to start the whole process
568 * with a sequence that may, in fact, produce warnings.
570 warn = 0;
573 * If we are looking for a flip, we're probably
574 * going to require that all non-frozen vertices
575 * participate in the mutation.
577 if (require_all_nonfrozen &&
578 c_idx == len - 1) {
579 for (size_t j = 0; j < num_nonfrozen; ++j) {
580 if (!seen_in_seq[j]) {
581 goto increment_sequence;
586 while (c_idx < len) {
587 /* Prune duplicates (mutations are involutions) */
588 if (c_idx &&
589 sequence[c_idx] == sequence[c_idx - 1]) {
590 goto increment_sequence;
593 /* Proceed one more mutation */
594 quiver_mutate(&q, sequence[c_idx], &warn, 0);
596 if (warn) {
597 /* That mutation caused a warning: skip it */
598 quiver_mutate(&q, sequence[c_idx], 0, 0);
600 /* And try the sequence, starting at c_idx */
601 goto increment_sequence;
605 * Skip ... X, Y ... when X > Y and the edge
606 * from X to Y is trivial
608 if (c_idx &&
609 sequence[c_idx] < sequence[c_idx - 1]) {
610 size_t x = sequence[c_idx - 1];
611 size_t y = sequence[c_idx];
612 struct rational *xy = &(q.e[x * q.v_len + y]);
613 struct rational *yx = &(q.e[y * q.v_len + x]);
615 if (!xy->p &&
616 !yx->p) {
617 /* Undo mutation */
618 quiver_mutate(&q, sequence[c_idx], 0,
621 /* And try the sequence, starting at c_idx */
622 goto increment_sequence;
626 c_idx++;
630 * Here, there's a full sequence, and the quiver has
631 * been mutated according to it (all of it). We want
632 * to check if the result matches the target.
634 if (check_valency_only) {
635 /* Take a short-cut */
636 this_valency_checksum = valency_checksum(&q);
638 if (this_valency_checksum != desired_valency_checksum) {
639 goto finished_check;
643 correct = 1;
645 for (size_t i = 0; i < q.v_num; ++i) {
646 for (size_t j = 0; j < q.v_num; ++j) {
647 struct rational *e = &(q.e[i * q.v_len + j]);
648 struct rational *f = &(target[i * q.v_num + j]);
650 if (e->p * f->q != f->p * e->q) {
651 correct = 0;
652 i = q.v_num + 1;
653 j = i;
658 if (correct ||
659 check_valency_only) {
660 printf("SEQUENCE: %s", q.v[sequence[0]].name);
662 for (size_t k = 1; k < len; ++k) {
663 printf(", %s", q.v[sequence[k]].name);
666 printf("\n");
667 fflush(stdout);
670 finished_check:
671 c_idx = len - 1;
672 quiver_mutate(&q, sequence[c_idx], 0, 0);
673 increment_sequence:
676 * Here, we have some kind of sequence. The quiver agrees
677 * with it up to (but not including) c_idx, and we want
678 * to increment it at c_idx.
680 s++;
681 seen_in_seq[sequence[c_idx]]--;
682 sequence[c_idx]++;
683 seen_in_seq[sequence[c_idx]]++;
685 if (verbose &&
686 c_idx == 2) {
687 double completed = sequence[0] * num_nonfrozen *
688 num_nonfrozen + sequence[1] *
689 num_nonfrozen +
690 sequence[2] - 1;
691 double total = num_nonfrozen * num_nonfrozen *
692 num_nonfrozen;
694 printf("Length %zu: completed %.3g%%\n", len, (100.0 *
695 completed)
696 / total);
697 fflush(stdout);
700 /* Skip over repeats: each mutation is an involution */
701 if (c_idx > 0 &&
702 sequence[c_idx] == sequence[c_idx - 1]) {
703 seen_in_seq[sequence[c_idx]]--;
704 sequence[c_idx]++;
705 seen_in_seq[sequence[c_idx]]++;
708 /* Wrap back to 0 */
709 if (sequence[c_idx] >= num_nonfrozen) {
710 if (!c_idx) {
711 goto exhausted_len;
714 c_idx--;
715 quiver_mutate(&q, sequence[c_idx], 0, 0);
716 goto increment_sequence;
719 if (c_idx != len - 1) {
721 * Here, we now have a start of a sequence.
722 * The quiver agrees with our start up to (but
723 * not including) c_idx, and we want to possibly
724 * complete this to a full sequence. First,
725 * however, we need to make sure that we don't
726 * introduce any warnings at c_idx.
728 * This single-mutation testing block could be
729 * ignored, but I have a vague suspicion it will
730 * be a hot path.
732 warn = 0;
733 quiver_mutate(&q, sequence[c_idx], &warn, 0);
735 if (warn) {
736 /* That mutation caused a warning: skip it */
737 quiver_mutate(&q, sequence[c_idx], 0, 0);
739 /* And try the sequence, starting at c_idx */
740 goto increment_sequence;
744 * Quivers almost commute: in particular if
745 * the edge v_i -> v_j is zero, then mutation
746 * at v_i and v_j commute. So if we're at X, Y
747 * in the list, and X > Y, then we've actually
748 * already computed that branch of the tree,
749 * back when it was Y, X. Therefore, increment X.
751 if (c_idx &&
752 sequence[c_idx] < sequence[c_idx - 1]) {
753 size_t x = sequence[c_idx - 1];
754 size_t y = sequence[c_idx];
755 struct rational *xy = &(q.e[x * q.v_len + y]);
756 struct rational *yx = &(q.e[y * q.v_len + x]);
758 if (!xy->p &&
759 !yx->p) {
760 /* Undo mutation */
761 quiver_mutate(&q, sequence[c_idx], 0,
764 /* And try the sequence, starting at c_idx */
765 goto increment_sequence;
770 * Here the quiver agrees with our start up
771 * to and including c_idx, and we need to fill
772 * it out into a full sequence so that it can
773 * be tested. We don't need to perform all the
774 * mutations of that full sequence, but we do
775 * need to generate the minimal one.
777 for (size_t i = c_idx + 1; i < len; ++i) {
778 seen_in_seq[sequence[i]]--;
779 sequence[i] = 1 - !!((i - c_idx) % 2);
780 seen_in_seq[sequence[i]]++;
783 c_idx++;
786 goto have_full_sequence;
787 exhausted_len:
790 * Here, we have c_idx = 0, and the quiver has not been
791 * mutated by anything in sequence
793 printf("Exhausted length %zu\n", len);
796 fflush(stdout);
797 done:
798 free(target);
799 free(seen_in_seq);
801 return ret;