.
[coreutils.git] / src / tsort.c
blobbf680c5775c05669e0eedeac6cec267806bb10f8
1 /* tsort - topological sort.
2 Copyright (C) 1998-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
20 /* The topological sort is done according to Algorithm T (Topological
21 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
22 1/Fundamental Algorithms, page 262. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <assert.h>
28 #include <getopt.h>
29 #include <sys/types.h>
31 #include "system.h"
32 #include "long-options.h"
33 #include "error.h"
34 #include "readtokens.h"
36 /* The official name of this program (e.g., no `g' prefix). */
37 #define PROGRAM_NAME "tsort"
39 #define AUTHORS "Mark Kettenis"
41 /* Token delimiters when reading from a file. */
42 #define DELIM " \t\n"
44 /* Members of the list of successors. */
45 struct successor
47 struct item *suc;
48 struct successor *next;
51 /* Each string is held in core as the head of a list of successors. */
52 struct item
54 const char *str;
55 struct item *left, *right;
56 int balance;
57 int count;
58 struct item *qlink;
59 struct successor *top;
62 /* The name this program was run with. */
63 char *program_name;
65 /* Nonzero if any of the input files are the standard input. */
66 static int have_read_stdin;
68 /* The error code to return to the system. */
69 static int exit_status;
71 /* The head of the sorted list. */
72 static struct item *head = NULL;
74 /* The tail of the list of `zeros', strings that have no predecessors. */
75 static struct item *zeros = NULL;
77 /* Used for loop detection. */
78 static struct item *loop = NULL;
80 /* The number of strings to sort. */
81 static int n_strings = 0;
83 static struct option const long_options[] =
85 { NULL, 0, NULL, 0}
88 void
89 usage (int status)
91 if (status != 0)
92 fprintf (stderr, _("Try `%s --help' for more information.\n"),
93 program_name);
94 else
96 printf (_("\
97 Usage: %s [OPTION] [FILE]\n\
98 Write totally ordered list consistent with the partial ordering in FILE.\n\
99 With no FILE, or when FILE is -, read standard input.\n\
101 "), program_name);
102 fputs (HELP_OPTION_DESCRIPTION, stdout);
103 fputs (VERSION_OPTION_DESCRIPTION, stdout);
104 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
107 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
110 /* Create a new item/node for STR. */
111 static struct item *
112 new_item (const char *str)
114 struct item *k = xmalloc (sizeof *k);
116 k->str = (str ? xstrdup (str): NULL);
117 k->left = k->right = NULL;
118 k->balance = 0;
120 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
121 k->count = 0;
122 k->qlink = NULL;
123 k->top = NULL;
125 return k;
128 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
129 *ROOT is NULL. Insert a node/item for STR if not found. Return
130 the node/item found/created for STR.
132 This is done according to Algorithm A (Balanced tree search and
133 insertion) in Donald E. Knuth, The Art of Computer Programming,
134 Volume 3/Searching and Sorting, pages 455--457. */
136 static struct item *
137 search_item (struct item *root, const char *str)
139 struct item *p, *q, *r, *s, *t;
140 int a;
142 assert (root);
144 /* Make sure the tree is not empty, since that is what the algorithm
145 below expects. */
146 if (root->right == NULL)
147 return (root->right = new_item (str));
149 /* A1. Initialize. */
150 t = root;
151 s = p = root->right;
153 for (;;)
155 /* A2. Compare. */
156 a = strcmp (str, p->str);
157 if (a == 0)
158 return p;
160 /* A3 & A4. Move left & right. */
161 if (a < 0)
162 q = p->left;
163 else
164 q = p->right;
166 if (q == NULL)
168 /* A5. Insert. */
169 q = new_item (str);
171 /* A3 & A4. (continued). */
172 if (a < 0)
173 p->left = q;
174 else
175 p->right = q;
177 /* A6. Adjust balance factors. */
178 assert (!STREQ (str, s->str));
179 if (strcmp (str, s->str) < 0)
181 r = p = s->left;
182 a = -1;
184 else
186 r = p = s->right;
187 a = 1;
190 while (p != q)
192 assert (!STREQ (str, p->str));
193 if (strcmp (str, p->str) < 0)
195 p->balance = -1;
196 p = p->left;
198 else
200 p->balance = 1;
201 p = p->right;
205 /* A7. Balancing act. */
206 if (s->balance == 0 || s->balance == -a)
208 s->balance += a;
209 return q;
212 if (r->balance == a)
214 /* A8. Single Rotation. */
215 p = r;
216 if (a < 0)
218 s->left = r->right;
219 r->right = s;
221 else
223 s->right = r->left;
224 r->left = s;
226 s->balance = r->balance = 0;
228 else
230 /* A9. Double rotation. */
231 if (a < 0)
233 p = r->right;
234 r->right = p->left;
235 p->left = r;
236 s->left = p->right;
237 p->right = s;
239 else
241 p = r->left;
242 r->left = p->right;
243 p->right = r;
244 s->right = p->left;
245 p->left = s;
248 s->balance = 0;
249 r->balance = 0;
250 if (p->balance == a)
251 s->balance = -a;
252 else if (p->balance == -a)
253 r->balance = a;
254 p->balance = 0;
257 /* A10. Finishing touch. */
258 if (s == t->right)
259 t->right = p;
260 else
261 t->left = p;
263 return q;
266 /* A3 & A4. (continued). */
267 if (q->balance)
269 t = p;
270 s = q;
273 p = q;
276 /* NOTREACHED */
279 /* Record the fact that J precedes K. */
281 static void
282 record_relation (struct item *j, struct item *k)
284 struct successor *p;
286 if (!STREQ (j->str, k->str))
288 k->count++;
289 p = xmalloc (sizeof *p);
290 p->suc = k;
291 p->next = j->top;
292 j->top = p;
296 static int
297 count_items (struct item *unused ATTRIBUTE_UNUSED)
299 n_strings++;
300 return 0;
303 static int
304 scan_zeros (struct item *k)
306 /* Ignore strings that have already been printed. */
307 if (k->count == 0 && k->str)
309 if (head == NULL)
310 head = k;
311 else
312 zeros->qlink = k;
314 zeros = k;
317 return 0;
320 /* Try to detect the loop. If we have detected that K is part of a
321 loop, print the loop on standard error, remove a relation to break
322 the loop, and return non-zero.
324 The loop detection strategy is as follows: Realise that what we're
325 dealing with is essentially a directed graph. If we find an item
326 that is part of a graph that contains a cycle we traverse the graph
327 in backwards direction. In general there is no unique way to do
328 this, but that is no problem. If we encounter an item that we have
329 encountered before, we know that we've found a cycle. All we have
330 to do now is retrace our steps, printing out the items until we
331 encounter that item again. (This is not necessarily the item that
332 we started from originally.) Since the order in which the items
333 are stored in the tree is not related to the specified partial
334 ordering, we may need to walk the tree several times before the
335 loop has completely been constructed. If the loop was found, the
336 global variable LOOP will be NULL. */
338 static int
339 detect_loop (struct item *k)
341 if (k->count > 0)
343 /* K does not have to be part of a cycle. It is however part of
344 a graph that contains a cycle. */
346 if (loop == NULL)
347 /* Start traversing the graph at K. */
348 loop = k;
349 else
351 struct successor **p = &k->top;
353 while (*p)
355 if ((*p)->suc == loop)
357 if (k->qlink)
359 /* We have found a loop. Retrace our steps. */
360 while (loop)
362 struct item *tmp = loop->qlink;
364 fprintf (stderr, "%s: %s\n", program_name,
365 loop->str);
367 /* Until we encounter K again. */
368 if (loop == k)
370 /* Remove relation. */
371 (*p)->suc->count--;
372 *p = (*p)->next;
373 break;
376 /* Tidy things up since we might have to
377 detect another loop. */
378 loop->qlink = NULL;
379 loop = tmp;
382 while (loop)
384 struct item *tmp = loop->qlink;
386 loop->qlink = NULL;
387 loop = tmp;
390 /* Since we have found the loop, stop walking
391 the tree. */
392 return 1;
394 else
396 k->qlink = loop;
397 loop = k;
398 break;
402 p = &(*p)->next;
407 return 0;
410 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
411 Stop when ACTION returns non-zero. */
413 static int
414 recurse_tree (struct item *root, int (*action) (struct item *))
416 if (root->left == NULL && root->right == NULL)
417 return (*action) (root);
418 else
420 if (root->left != NULL)
421 if (recurse_tree (root->left, action))
422 return 1;
423 if ((*action) (root))
424 return 1;
425 if (root->right != NULL)
426 if (recurse_tree (root->right, action))
427 return 1;
430 return 0;
433 /* Walk the tree specified by the head ROOT, calling ACTION for
434 each node. */
436 static void
437 walk_tree (struct item *root, int (*action) (struct item *))
439 if (root->right)
440 recurse_tree (root->right, action);
443 /* Do a topological sort on FILE. */
445 static void
446 tsort (const char *file)
448 struct item *root;
449 struct item *j = NULL;
450 struct item *k = NULL;
451 register FILE *fp;
452 token_buffer tokenbuffer;
454 /* Intialize the head of the tree will hold the strings we're sorting. */
455 root = new_item (NULL);
457 if (STREQ (file, "-"))
459 fp = stdin;
460 have_read_stdin = 1;
462 else
464 fp = fopen (file, "r");
465 if (fp == NULL)
466 error (EXIT_FAILURE, errno, "%s", file);
469 init_tokenbuffer (&tokenbuffer);
471 while (1)
473 long int len;
475 /* T2. Next Relation. */
476 len = readtoken (fp, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
477 if (len < 0)
478 break;
480 assert (len != 0);
482 k = search_item (root, tokenbuffer.buffer);
483 if (j)
485 /* T3. Record the relation. */
486 record_relation (j, k);
487 k = NULL;
490 j = k;
493 if (k != NULL)
494 error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
495 file);
497 /* T1. Initialize (N <- n). */
498 walk_tree (root, count_items);
500 while (n_strings > 0)
502 /* T4. Scan for zeros. */
503 walk_tree (root, scan_zeros);
505 while (head)
507 struct successor *p = head->top;
509 /* T5. Output front of queue. */
510 printf ("%s\n", head->str);
511 head->str = NULL; /* Avoid printing the same string twice. */
512 n_strings--;
514 /* T6. Erase relations. */
515 while (p)
517 p->suc->count--;
518 if (p->suc->count == 0)
520 zeros->qlink = p->suc;
521 zeros = p->suc;
524 p = p->next;
527 /* T7. Remove from queue. */
528 head = head->qlink;
531 /* T8. End of process. */
532 assert (n_strings >= 0);
533 if (n_strings > 0)
535 /* The input contains a loop. */
536 error (0, 0, _("%s: input contains a loop:"), file);
537 exit_status = 1;
539 /* Print the loop and remove a relation to break it. */
541 walk_tree (root, detect_loop);
542 while (loop);
548 main (int argc, char **argv)
550 int opt;
552 initialize_main (&argc, &argv);
553 program_name = argv[0];
554 setlocale (LC_ALL, "");
555 bindtextdomain (PACKAGE, LOCALEDIR);
556 textdomain (PACKAGE);
558 atexit (close_stdout);
560 exit_status = 0;
562 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, VERSION,
563 usage, AUTHORS, (char const *) NULL);
565 while ((opt = getopt_long (argc, argv, "", long_options, NULL)) != -1)
566 switch (opt)
568 case 0: /* long option */
569 break;
570 default:
571 usage (EXIT_FAILURE);
574 have_read_stdin = 0;
576 if (1 < argc - optind)
578 error (0, 0, _("only one argument may be specified"));
579 usage (EXIT_FAILURE);
582 tsort (optind == argc ? "-" : argv[optind]);
584 if (have_read_stdin && fclose (stdin) == EOF)
585 error (EXIT_FAILURE, errno, _("standard input"));
587 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);