version 8.7
[coreutils.git] / src / tsort.c
blob4f51f3074bf09f82f940d275c53d4266ec9862ee
1 /* tsort - topological sort.
2 Copyright (C) 1998-2005, 2007-2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
19 /* The topological sort is done according to Algorithm T (Topological
20 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21 1/Fundamental Algorithms, page 262. */
23 #include <config.h>
25 #include <assert.h>
26 #include <getopt.h>
27 #include <sys/types.h>
29 #include "system.h"
30 #include "long-options.h"
31 #include "error.h"
32 #include "fadvise.h"
33 #include "quote.h"
34 #include "readtokens.h"
35 #include "stdio--.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "tsort"
40 #define AUTHORS proper_name ("Mark Kettenis")
42 /* Token delimiters when reading from a file. */
43 #define DELIM " \t\n"
45 /* Members of the list of successors. */
46 struct successor
48 struct item *suc;
49 struct successor *next;
52 /* Each string is held in core as the head of a list of successors. */
53 struct item
55 const char *str;
56 struct item *left, *right;
57 int balance; /* -1, 0, or +1 */
58 size_t count;
59 struct item *qlink;
60 struct successor *top;
63 /* The head of the sorted list. */
64 static struct item *head = NULL;
66 /* The tail of the list of `zeros', strings that have no predecessors. */
67 static struct item *zeros = NULL;
69 /* Used for loop detection. */
70 static struct item *loop = NULL;
72 /* The number of strings to sort. */
73 static size_t n_strings = 0;
75 void
76 usage (int status)
78 if (status != EXIT_SUCCESS)
79 fprintf (stderr, _("Try `%s --help' for more information.\n"),
80 program_name);
81 else
83 printf (_("\
84 Usage: %s [OPTION] [FILE]\n\
85 Write totally ordered list consistent with the partial ordering in FILE.\n\
86 With no FILE, or when FILE is -, read standard input.\n\
87 \n\
88 "), program_name);
89 fputs (HELP_OPTION_DESCRIPTION, stdout);
90 fputs (VERSION_OPTION_DESCRIPTION, stdout);
91 emit_ancillary_info ();
94 exit (status);
97 /* Create a new item/node for STR. */
98 static struct item *
99 new_item (const char *str)
101 struct item *k = xmalloc (sizeof *k);
103 k->str = (str ? xstrdup (str): NULL);
104 k->left = k->right = NULL;
105 k->balance = 0;
107 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
108 k->count = 0;
109 k->qlink = NULL;
110 k->top = NULL;
112 return k;
115 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
116 *ROOT is NULL. Insert a node/item for STR if not found. Return
117 the node/item found/created for STR.
119 This is done according to Algorithm A (Balanced tree search and
120 insertion) in Donald E. Knuth, The Art of Computer Programming,
121 Volume 3/Searching and Sorting, pages 455--457. */
123 static struct item *
124 search_item (struct item *root, const char *str)
126 struct item *p, *q, *r, *s, *t;
127 int a;
129 assert (root);
131 /* Make sure the tree is not empty, since that is what the algorithm
132 below expects. */
133 if (root->right == NULL)
134 return (root->right = new_item (str));
136 /* A1. Initialize. */
137 t = root;
138 s = p = root->right;
140 while (true)
142 /* A2. Compare. */
143 a = strcmp (str, p->str);
144 if (a == 0)
145 return p;
147 /* A3 & A4. Move left & right. */
148 if (a < 0)
149 q = p->left;
150 else
151 q = p->right;
153 if (q == NULL)
155 /* A5. Insert. */
156 q = new_item (str);
158 /* A3 & A4. (continued). */
159 if (a < 0)
160 p->left = q;
161 else
162 p->right = q;
164 /* A6. Adjust balance factors. */
165 assert (!STREQ (str, s->str));
166 if (strcmp (str, s->str) < 0)
168 r = p = s->left;
169 a = -1;
171 else
173 r = p = s->right;
174 a = 1;
177 while (p != q)
179 assert (!STREQ (str, p->str));
180 if (strcmp (str, p->str) < 0)
182 p->balance = -1;
183 p = p->left;
185 else
187 p->balance = 1;
188 p = p->right;
192 /* A7. Balancing act. */
193 if (s->balance == 0 || s->balance == -a)
195 s->balance += a;
196 return q;
199 if (r->balance == a)
201 /* A8. Single Rotation. */
202 p = r;
203 if (a < 0)
205 s->left = r->right;
206 r->right = s;
208 else
210 s->right = r->left;
211 r->left = s;
213 s->balance = r->balance = 0;
215 else
217 /* A9. Double rotation. */
218 if (a < 0)
220 p = r->right;
221 r->right = p->left;
222 p->left = r;
223 s->left = p->right;
224 p->right = s;
226 else
228 p = r->left;
229 r->left = p->right;
230 p->right = r;
231 s->right = p->left;
232 p->left = s;
235 s->balance = 0;
236 r->balance = 0;
237 if (p->balance == a)
238 s->balance = -a;
239 else if (p->balance == -a)
240 r->balance = a;
241 p->balance = 0;
244 /* A10. Finishing touch. */
245 if (s == t->right)
246 t->right = p;
247 else
248 t->left = p;
250 return q;
253 /* A3 & A4. (continued). */
254 if (q->balance)
256 t = p;
257 s = q;
260 p = q;
263 /* NOTREACHED */
266 /* Record the fact that J precedes K. */
268 static void
269 record_relation (struct item *j, struct item *k)
271 struct successor *p;
273 if (!STREQ (j->str, k->str))
275 k->count++;
276 p = xmalloc (sizeof *p);
277 p->suc = k;
278 p->next = j->top;
279 j->top = p;
283 static bool
284 count_items (struct item *unused ATTRIBUTE_UNUSED)
286 n_strings++;
287 return false;
290 static bool
291 scan_zeros (struct item *k)
293 /* Ignore strings that have already been printed. */
294 if (k->count == 0 && k->str)
296 if (head == NULL)
297 head = k;
298 else
299 zeros->qlink = k;
301 zeros = k;
304 return false;
307 /* Try to detect the loop. If we have detected that K is part of a
308 loop, print the loop on standard error, remove a relation to break
309 the loop, and return true.
311 The loop detection strategy is as follows: Realise that what we're
312 dealing with is essentially a directed graph. If we find an item
313 that is part of a graph that contains a cycle we traverse the graph
314 in backwards direction. In general there is no unique way to do
315 this, but that is no problem. If we encounter an item that we have
316 encountered before, we know that we've found a cycle. All we have
317 to do now is retrace our steps, printing out the items until we
318 encounter that item again. (This is not necessarily the item that
319 we started from originally.) Since the order in which the items
320 are stored in the tree is not related to the specified partial
321 ordering, we may need to walk the tree several times before the
322 loop has completely been constructed. If the loop was found, the
323 global variable LOOP will be NULL. */
325 static bool
326 detect_loop (struct item *k)
328 if (k->count > 0)
330 /* K does not have to be part of a cycle. It is however part of
331 a graph that contains a cycle. */
333 if (loop == NULL)
334 /* Start traversing the graph at K. */
335 loop = k;
336 else
338 struct successor **p = &k->top;
340 while (*p)
342 if ((*p)->suc == loop)
344 if (k->qlink)
346 /* We have found a loop. Retrace our steps. */
347 while (loop)
349 struct item *tmp = loop->qlink;
351 fprintf (stderr, "%s: %s\n", program_name,
352 loop->str);
354 /* Until we encounter K again. */
355 if (loop == k)
357 /* Remove relation. */
358 (*p)->suc->count--;
359 *p = (*p)->next;
360 break;
363 /* Tidy things up since we might have to
364 detect another loop. */
365 loop->qlink = NULL;
366 loop = tmp;
369 while (loop)
371 struct item *tmp = loop->qlink;
373 loop->qlink = NULL;
374 loop = tmp;
377 /* Since we have found the loop, stop walking
378 the tree. */
379 return true;
381 else
383 k->qlink = loop;
384 loop = k;
385 break;
389 p = &(*p)->next;
394 return false;
397 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
398 Stop when ACTION returns true. */
400 static bool
401 recurse_tree (struct item *root, bool (*action) (struct item *))
403 if (root->left == NULL && root->right == NULL)
404 return (*action) (root);
405 else
407 if (root->left != NULL)
408 if (recurse_tree (root->left, action))
409 return true;
410 if ((*action) (root))
411 return true;
412 if (root->right != NULL)
413 if (recurse_tree (root->right, action))
414 return true;
417 return false;
420 /* Walk the tree specified by the head ROOT, calling ACTION for
421 each node. */
423 static void
424 walk_tree (struct item *root, bool (*action) (struct item *))
426 if (root->right)
427 recurse_tree (root->right, action);
430 /* Do a topological sort on FILE. Return true if successful. */
432 static bool
433 tsort (const char *file)
435 bool ok = true;
436 struct item *root;
437 struct item *j = NULL;
438 struct item *k = NULL;
439 token_buffer tokenbuffer;
440 bool is_stdin = STREQ (file, "-");
442 /* Intialize the head of the tree will hold the strings we're sorting. */
443 root = new_item (NULL);
445 if (!is_stdin && ! freopen (file, "r", stdin))
446 error (EXIT_FAILURE, errno, "%s", file);
448 fadvise (stdin, FADVISE_SEQUENTIAL);
450 init_tokenbuffer (&tokenbuffer);
452 while (1)
454 /* T2. Next Relation. */
455 size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
456 if (len == (size_t) -1)
457 break;
459 assert (len != 0);
461 k = search_item (root, tokenbuffer.buffer);
462 if (j)
464 /* T3. Record the relation. */
465 record_relation (j, k);
466 k = NULL;
469 j = k;
472 if (k != NULL)
473 error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
474 file);
476 /* T1. Initialize (N <- n). */
477 walk_tree (root, count_items);
479 while (n_strings > 0)
481 /* T4. Scan for zeros. */
482 walk_tree (root, scan_zeros);
484 while (head)
486 struct successor *p = head->top;
488 /* T5. Output front of queue. */
489 puts (head->str);
490 head->str = NULL; /* Avoid printing the same string twice. */
491 n_strings--;
493 /* T6. Erase relations. */
494 while (p)
496 p->suc->count--;
497 if (p->suc->count == 0)
499 zeros->qlink = p->suc;
500 zeros = p->suc;
503 p = p->next;
506 /* T7. Remove from queue. */
507 head = head->qlink;
510 /* T8. End of process. */
511 if (n_strings > 0)
513 /* The input contains a loop. */
514 error (0, 0, _("%s: input contains a loop:"), file);
515 ok = false;
517 /* Print the loop and remove a relation to break it. */
519 walk_tree (root, detect_loop);
520 while (loop);
524 if (fclose (stdin) != 0)
525 error (EXIT_FAILURE, errno, "%s",
526 is_stdin ? _("standard input") : quote (file));
528 return ok;
532 main (int argc, char **argv)
534 bool ok;
536 initialize_main (&argc, &argv);
537 set_program_name (argv[0]);
538 setlocale (LC_ALL, "");
539 bindtextdomain (PACKAGE, LOCALEDIR);
540 textdomain (PACKAGE);
542 atexit (close_stdout);
544 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, Version,
545 usage, AUTHORS, (char const *) NULL);
546 if (getopt_long (argc, argv, "", NULL, NULL) != -1)
547 usage (EXIT_FAILURE);
549 if (1 < argc - optind)
551 error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
552 usage (EXIT_FAILURE);
555 ok = tsort (optind == argc ? "-" : argv[optind]);
557 exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);