maint: add doc/coverage to .gitignore
[coreutils.git] / src / tsort.c
blobcb84393e9c312f8199d72d21c23eb489529efd16
1 /* tsort - topological sort.
2 Copyright (C) 1998-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <assert.h>
26 #include <getopt.h>
27 #include <sys/types.h>
29 #include "system.h"
30 #include "long-options.h"
31 #include "die.h"
32 #include "error.h"
33 #include "fadvise.h"
34 #include "readtokens.h"
35 #include "stdio--.h"
36 #include "quote.h"
38 /* The official name of this program (e.g., no 'g' prefix). */
39 #define PROGRAM_NAME "tsort"
41 #define AUTHORS proper_name ("Mark Kettenis")
43 static struct option const long_options[] =
45 {NULL, 0, NULL, 0}
48 /* Token delimiters when reading from a file. */
49 #define DELIM " \t\n"
51 /* Members of the list of successors. */
52 struct successor
54 struct item *suc;
55 struct successor *next;
58 /* Each string is held in core as the head of a list of successors. */
59 struct item
61 const char *str;
62 struct item *left, *right;
63 int balance; /* -1, 0, or +1 */
64 size_t count;
65 struct item *qlink;
66 struct successor *top;
69 /* The head of the sorted list. */
70 static struct item *head = NULL;
72 /* The tail of the list of 'zeros', strings that have no predecessors. */
73 static struct item *zeros = NULL;
75 /* Used for loop detection. */
76 static struct item *loop = NULL;
78 /* The number of strings to sort. */
79 static size_t n_strings = 0;
81 void
82 usage (int status)
84 if (status != EXIT_SUCCESS)
85 emit_try_help ();
86 else
88 printf (_("\
89 Usage: %s [OPTION] [FILE]\n\
90 Write totally ordered list consistent with the partial ordering in FILE.\n\
91 "), program_name);
93 emit_stdin_note ();
95 fputs (_("\
96 \n\
97 "), stdout);
98 fputs (HELP_OPTION_DESCRIPTION, stdout);
99 fputs (VERSION_OPTION_DESCRIPTION, stdout);
100 emit_ancillary_info (PROGRAM_NAME);
103 exit (status);
106 /* Create a new item/node for STR. */
107 static struct item *
108 new_item (const char *str)
110 struct item *k = xmalloc (sizeof *k);
112 k->str = (str ? xstrdup (str): NULL);
113 k->left = k->right = NULL;
114 k->balance = 0;
116 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
117 k->count = 0;
118 k->qlink = NULL;
119 k->top = NULL;
121 return k;
124 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
125 *ROOT is NULL. Insert a node/item for STR if not found. Return
126 the node/item found/created for STR.
128 This is done according to Algorithm A (Balanced tree search and
129 insertion) in Donald E. Knuth, The Art of Computer Programming,
130 Volume 3/Searching and Sorting, pages 455--457. */
132 static struct item *
133 search_item (struct item *root, const char *str)
135 struct item *p, *q, *r, *s, *t;
136 int a;
138 assert (root);
140 /* Make sure the tree is not empty, since that is what the algorithm
141 below expects. */
142 if (root->right == NULL)
143 return (root->right = new_item (str));
145 /* A1. Initialize. */
146 t = root;
147 s = p = root->right;
149 while (true)
151 /* A2. Compare. */
152 a = strcmp (str, p->str);
153 if (a == 0)
154 return p;
156 /* A3 & A4. Move left & right. */
157 if (a < 0)
158 q = p->left;
159 else
160 q = p->right;
162 if (q == NULL)
164 /* A5. Insert. */
165 q = new_item (str);
167 /* A3 & A4. (continued). */
168 if (a < 0)
169 p->left = q;
170 else
171 p->right = q;
173 /* A6. Adjust balance factors. */
174 assert (!STREQ (str, s->str));
175 if (strcmp (str, s->str) < 0)
177 r = p = s->left;
178 a = -1;
180 else
182 r = p = s->right;
183 a = 1;
186 while (p != q)
188 assert (!STREQ (str, p->str));
189 if (strcmp (str, p->str) < 0)
191 p->balance = -1;
192 p = p->left;
194 else
196 p->balance = 1;
197 p = p->right;
201 /* A7. Balancing act. */
202 if (s->balance == 0 || s->balance == -a)
204 s->balance += a;
205 return q;
208 if (r->balance == a)
210 /* A8. Single Rotation. */
211 p = r;
212 if (a < 0)
214 s->left = r->right;
215 r->right = s;
217 else
219 s->right = r->left;
220 r->left = s;
222 s->balance = r->balance = 0;
224 else
226 /* A9. Double rotation. */
227 if (a < 0)
229 p = r->right;
230 r->right = p->left;
231 p->left = r;
232 s->left = p->right;
233 p->right = s;
235 else
237 p = r->left;
238 r->left = p->right;
239 p->right = r;
240 s->right = p->left;
241 p->left = s;
244 s->balance = 0;
245 r->balance = 0;
246 if (p->balance == a)
247 s->balance = -a;
248 else if (p->balance == -a)
249 r->balance = a;
250 p->balance = 0;
253 /* A10. Finishing touch. */
254 if (s == t->right)
255 t->right = p;
256 else
257 t->left = p;
259 return q;
262 /* A3 & A4. (continued). */
263 if (q->balance)
265 t = p;
266 s = q;
269 p = q;
272 /* NOTREACHED */
275 /* Record the fact that J precedes K. */
277 static void
278 record_relation (struct item *j, struct item *k)
280 struct successor *p;
282 if (!STREQ (j->str, k->str))
284 k->count++;
285 p = xmalloc (sizeof *p);
286 p->suc = k;
287 p->next = j->top;
288 j->top = p;
292 static bool
293 count_items (struct item *unused _GL_UNUSED)
295 n_strings++;
296 return false;
299 static bool
300 scan_zeros (struct item *k)
302 /* Ignore strings that have already been printed. */
303 if (k->count == 0 && k->str)
305 if (head == NULL)
306 head = k;
307 else
308 zeros->qlink = k;
310 zeros = k;
313 return false;
316 /* Try to detect the loop. If we have detected that K is part of a
317 loop, print the loop on standard error, remove a relation to break
318 the loop, and return true.
320 The loop detection strategy is as follows: Realise that what we're
321 dealing with is essentially a directed graph. If we find an item
322 that is part of a graph that contains a cycle we traverse the graph
323 in backwards direction. In general there is no unique way to do
324 this, but that is no problem. If we encounter an item that we have
325 encountered before, we know that we've found a cycle. All we have
326 to do now is retrace our steps, printing out the items until we
327 encounter that item again. (This is not necessarily the item that
328 we started from originally.) Since the order in which the items
329 are stored in the tree is not related to the specified partial
330 ordering, we may need to walk the tree several times before the
331 loop has completely been constructed. If the loop was found, the
332 global variable LOOP will be NULL. */
334 static bool
335 detect_loop (struct item *k)
337 if (k->count > 0)
339 /* K does not have to be part of a cycle. It is however part of
340 a graph that contains a cycle. */
342 if (loop == NULL)
343 /* Start traversing the graph at K. */
344 loop = k;
345 else
347 struct successor **p = &k->top;
349 while (*p)
351 if ((*p)->suc == loop)
353 if (k->qlink)
355 /* We have found a loop. Retrace our steps. */
356 while (loop)
358 struct item *tmp = loop->qlink;
360 error (0, 0, "%s", (loop->str));
362 /* Until we encounter K again. */
363 if (loop == k)
365 /* Remove relation. */
366 (*p)->suc->count--;
367 *p = (*p)->next;
368 break;
371 /* Tidy things up since we might have to
372 detect another loop. */
373 loop->qlink = NULL;
374 loop = tmp;
377 while (loop)
379 struct item *tmp = loop->qlink;
381 loop->qlink = NULL;
382 loop = tmp;
385 /* Since we have found the loop, stop walking
386 the tree. */
387 return true;
389 else
391 k->qlink = loop;
392 loop = k;
393 break;
397 p = &(*p)->next;
402 return false;
405 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
406 Stop when ACTION returns true. */
408 static bool
409 recurse_tree (struct item *root, bool (*action) (struct item *))
411 if (root->left == NULL && root->right == NULL)
412 return (*action) (root);
413 else
415 if (root->left != NULL)
416 if (recurse_tree (root->left, action))
417 return true;
418 if ((*action) (root))
419 return true;
420 if (root->right != NULL)
421 if (recurse_tree (root->right, action))
422 return true;
425 return false;
428 /* Walk the tree specified by the head ROOT, calling ACTION for
429 each node. */
431 static void
432 walk_tree (struct item *root, bool (*action) (struct item *))
434 if (root->right)
435 recurse_tree (root->right, action);
438 /* Do a topological sort on FILE. Return true if successful. */
440 static bool
441 tsort (const char *file)
443 bool ok = true;
444 struct item *root;
445 struct item *j = NULL;
446 struct item *k = NULL;
447 token_buffer tokenbuffer;
448 bool is_stdin = STREQ (file, "-");
450 /* Intialize the head of the tree will hold the strings we're sorting. */
451 root = new_item (NULL);
453 if (!is_stdin && ! freopen (file, "r", stdin))
454 die (EXIT_FAILURE, errno, "%s", quotef (file));
456 fadvise (stdin, FADVISE_SEQUENTIAL);
458 init_tokenbuffer (&tokenbuffer);
460 while (1)
462 /* T2. Next Relation. */
463 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
464 if (len == (size_t) -1)
465 break;
467 assert (len != 0);
469 k = search_item (root, tokenbuffer.buffer);
470 if (j)
472 /* T3. Record the relation. */
473 record_relation (j, k);
474 k = NULL;
477 j = k;
480 if (k != NULL)
481 die (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
482 quotef (file));
484 /* T1. Initialize (N <- n). */
485 walk_tree (root, count_items);
487 while (n_strings > 0)
489 /* T4. Scan for zeros. */
490 walk_tree (root, scan_zeros);
492 while (head)
494 struct successor *p = head->top;
496 /* T5. Output front of queue. */
497 puts (head->str);
498 #ifdef lint
499 /* suppress valgrind "definitely lost" warnings. */
500 void *head_str = (void *) head->str;
501 free (head_str);
502 #endif
503 head->str = NULL; /* Avoid printing the same string twice. */
504 n_strings--;
506 /* T6. Erase relations. */
507 while (p)
509 p->suc->count--;
510 if (p->suc->count == 0)
512 zeros->qlink = p->suc;
513 zeros = p->suc;
516 p = p->next;
519 /* T7. Remove from queue. */
520 head = head->qlink;
523 /* T8. End of process. */
524 if (n_strings > 0)
526 /* The input contains a loop. */
527 error (0, 0, _("%s: input contains a loop:"), quotef (file));
528 ok = false;
530 /* Print the loop and remove a relation to break it. */
532 walk_tree (root, detect_loop);
533 while (loop);
537 IF_LINT (free (root));
539 if (fclose (stdin) != 0)
540 die (EXIT_FAILURE, errno, "%s",
541 is_stdin ? _("standard input") : quotef (file));
543 return ok;
547 main (int argc, char **argv)
549 bool ok;
551 initialize_main (&argc, &argv);
552 set_program_name (argv[0]);
553 setlocale (LC_ALL, "");
554 bindtextdomain (PACKAGE, LOCALEDIR);
555 textdomain (PACKAGE);
557 atexit (close_stdout);
559 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, Version,
560 usage, AUTHORS, (char const *) NULL);
561 if (getopt_long (argc, argv, "", long_options, NULL) != -1)
562 usage (EXIT_FAILURE);
564 if (1 < argc - optind)
566 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
567 usage (EXIT_FAILURE);
570 ok = tsort (optind == argc ? "-" : argv[optind]);
572 return ok ? EXIT_SUCCESS : EXIT_FAILURE;