*** empty log message ***
[coreutils.git] / src / tsort.c
blob612713925f345f49ef9d6eedf630d7c9305332cd
1 /* tsort - topological sort.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Mark Kettenis <kettenis@phys.uva.nl>. */
20 /* The topological sort is done according to Algorithm T (Topological
21 sort) in Donald E. Knuth, The Art of Computer Programming, Volume
22 1/Fundamental Algorithms, page 262. */
24 #include <config.h>
26 #include <stdio.h>
27 #include <assert.h>
28 #include <getopt.h>
29 #include <sys/types.h>
31 #include "system.h"
32 #include "closeout.h"
33 #include "long-options.h"
34 #include "error.h"
35 #include "readtokens.h"
37 /* The official name of this program (e.g., no `g' prefix). */
38 #define PROGRAM_NAME "tsort"
40 #define AUTHORS "Mark Kettenis"
42 /* Token delimiters when reading from a file. */
43 #define DELIM " \t\n"
45 /* Members of the list of successors. */
46 struct successor
48 struct item *suc;
49 struct successor *next;
52 /* Each string is held in core as the head of a list of successors. */
53 struct item
55 const char *str;
56 struct item *left, *right;
57 int balance;
58 int count;
59 struct item *qlink;
60 struct successor *top;
63 /* The name this program was run with. */
64 char *program_name;
66 /* Nonzero if any of the input files are the standard input. */
67 static int have_read_stdin;
69 /* The error code to return to the system. */
70 static int exit_status;
72 /* The head of the sorted list. */
73 static struct item *head = NULL;
75 /* The tail of the list of `zeros', strings that have no predecessors. */
76 static struct item *zeros = NULL;
78 /* Used for loop detection. */
79 static struct item *loop = NULL;
81 /* The number of strings to sort. */
82 static int n_strings = 0;
84 static struct option const long_options[] =
86 { NULL, 0, NULL, 0}
89 void
90 usage (int status)
92 if (status != 0)
93 fprintf (stderr, _("Try `%s --help' for more information.\n"),
94 program_name);
95 else
97 printf (_("\
98 Usage: %s [OPTION] [FILE]\n\
99 Write totally ordered list consistent with the partial ordering in FILE.\n\
100 With no FILE, or when FILE is -, read standard input.\n\
102 --help display this help and exit\n\
103 --version output version information and exit\n"),
104 program_name);
105 puts (_("\nReport bugs to <bug-textutils@gnu.org>."));
108 exit (status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
111 /* Create a new item/node for STR. */
112 static struct item *
113 new_item (const char *str)
115 struct item *k = xmalloc (sizeof (struct item));
117 k->str = (str ? xstrdup (str): NULL);
118 k->left = k->right = NULL;
119 k->balance = 0;
121 /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^). */
122 k->count = 0;
123 k->qlink = NULL;
124 k->top = NULL;
126 return k;
129 /* Search binary tree rooted at *ROOT for STR. Allocate a new tree if
130 *ROOT is NULL. Insert a node/item for STR if not found. Return
131 the node/item found/created for STR.
133 This is done according to Algorithm A (Balanced tree search and
134 insertion) in Donald E. Knuth, The Art of Computer Programming,
135 Volume 3/Searching and Sorting, pages 455--457. */
137 static struct item *
138 search_item (struct item *root, const char *str)
140 struct item *p, *q, *r, *s, *t;
141 int a;
143 assert (root);
145 /* Make sure the tree is not empty, since that is what the algorithm
146 below expects. */
147 if (root->right == NULL)
148 return (root->right = new_item (str));
150 /* A1. Initialize. */
151 t = root;
152 s = p = root->right;
154 for (;;)
156 /* A2. Compare. */
157 a = strcmp (str, p->str);
158 if (a == 0)
159 return p;
161 /* A3 & A4. Move left & right. */
162 if (a < 0)
163 q = p->left;
164 else
165 q = p->right;
167 if (q == NULL)
169 /* A5. Insert. */
170 q = new_item (str);
172 /* A3 & A4. (continued). */
173 if (a < 0)
174 p->left = q;
175 else
176 p->right = q;
178 /* A6. Adjust balance factors. */
179 assert (!STREQ (str, s->str));
180 if (strcmp (str, s->str) < 0)
182 r = p = s->left;
183 a = -1;
185 else
187 r = p = s->right;
188 a = 1;
191 while (p != q)
193 assert (!STREQ (str, p->str));
194 if (strcmp (str, p->str) < 0)
196 p->balance = -1;
197 p = p->left;
199 else
201 p->balance = 1;
202 p = p->right;
206 /* A7. Balancing act. */
207 if (s->balance == 0 || s->balance == -a)
209 s->balance += a;
210 return q;
213 if (r->balance == a)
215 /* A8. Single Rotation. */
216 p = r;
217 if (a < 0)
219 s->left = r->right;
220 r->right = s;
222 else
224 s->right = r->left;
225 r->left = s;
227 s->balance = r->balance = 0;
229 else
231 /* A9. Double rotation. */
232 if (a < 0)
234 p = r->right;
235 r->right = p->left;
236 p->left = r;
237 s->left = p->right;
238 p->right = s;
240 else
242 p = r->left;
243 r->left = p->right;
244 p->right = r;
245 s->right = p->left;
246 p->left = s;
249 s->balance = 0;
250 r->balance = 0;
251 if (p->balance == a)
252 s->balance = -a;
253 else if (p->balance == -a)
254 r->balance = a;
255 p->balance = 0;
258 /* A10. Finishing touch. */
259 if (s == t->right)
260 t->right = p;
261 else
262 t->left = p;
264 return q;
267 /* A3 & A4. (continued). */
268 if (q->balance)
270 t = p;
271 s = q;
274 p = q;
277 /* NOTREACHED */
280 /* Record the fact that J precedes K. */
282 static void
283 record_relation (struct item *j, struct item *k)
285 struct successor *p;
287 if (!STREQ (j->str, k->str))
289 k->count++;
290 p = xmalloc (sizeof (struct successor));
291 p->suc = k;
292 p->next = j->top;
293 j->top = p;
297 static int
298 count_items (struct item *k)
300 n_strings++;
301 return 0;
304 static int
305 scan_zeros (struct item *k)
307 /* Ignore strings that have already been printed. */
308 if (k->count == 0 && k->str)
310 if (head == NULL)
311 head = k;
312 else
313 zeros->qlink = k;
315 zeros = k;
318 return 0;
321 /* Try to detect the loop. If we have detected that K is part of a
322 loop, print the loop on standard error, remove a relation to break
323 the loop, and return non-zero.
325 The loop detection strategy is as follows: Realise that what we're
326 dealing with is essentially a directed graph. If we find an item
327 that is part of a graph that contains a cycle we traverse the graph
328 in backwards direction. In general there is no unique way to do
329 this, but that is no problem. If we encounter an item that we have
330 encountered before, we know that we've found a cycle. All we have
331 to do now is retrace our steps, printing out the items until we
332 encounter that item again. (This is not necessarily the item that
333 we started from originally.) Since the order in which the items
334 are stored in the tree is not related to the specified partial
335 ordering, we may need to walk the tree several times before the
336 loop has completely been constructed. If the loop was found, the
337 global variable LOOP will be NULL. */
339 static int
340 detect_loop (struct item *k)
342 if (k->count > 0)
344 /* K does not have to be part of a cycle. It is however part of
345 a graph that contains a cycle. */
347 if (loop == NULL)
348 /* Start traversing the graph at K. */
349 loop = k;
350 else
352 struct successor **p = &k->top;
354 while (*p)
356 if ((*p)->suc == loop)
358 if (k->qlink)
360 /* We have found a loop. Retrace our steps. */
361 while (loop)
363 struct item *tmp = loop->qlink;
365 fprintf (stderr, "%s: %s\n", program_name,
366 loop->str);
368 /* Until we encounter K again. */
369 if (loop == k)
371 /* Remove relation. */
372 (*p)->suc->count--;
373 *p = (*p)->next;
374 break;
377 /* Tidy things up since we might have to
378 detect another loop. */
379 loop->qlink = NULL;
380 loop = tmp;
383 while (loop)
385 struct item *tmp = loop->qlink;
387 loop->qlink = NULL;
388 loop = tmp;
391 /* Since we have found the loop, stop walking
392 the tree. */
393 return 1;
395 else
397 k->qlink = loop;
398 loop = k;
399 break;
403 p = &(*p)->next;
408 return 0;
411 /* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
412 Stop when ACTION returns non-zero. */
414 static int
415 recurse_tree (struct item *root, int (*action) (struct item *))
417 if (root->left == NULL && root->right == NULL)
418 return (*action) (root);
419 else
421 if (root->left != NULL)
422 if (recurse_tree (root->left, action))
423 return 1;
424 if ((*action) (root))
425 return 1;
426 if (root->right != NULL)
427 if (recurse_tree (root->right, action))
428 return 1;
431 return 0;
434 /* Walk the tree specified by the head ROOT, calling ACTION for
435 each node. */
437 static void
438 walk_tree (struct item *root, int (*action) (struct item *))
440 if (root->right)
441 recurse_tree (root->right, action);
444 /* Do a topological sort on FILE. */
446 static void
447 tsort (const char *file)
449 struct item *root;
450 struct item *j = NULL;
451 struct item *k = NULL;
452 register FILE *fp;
453 token_buffer tokenbuffer;
455 /* Intialize the head of the tree will hold the strings we're sorting. */
456 root = new_item (NULL);
458 if (STREQ (file, "-"))
460 fp = stdin;
461 have_read_stdin = 1;
463 else
465 fp = fopen (file, "r");
466 if (fp == NULL)
467 error (EXIT_FAILURE, errno, "%s", file);
470 init_tokenbuffer (&tokenbuffer);
472 while (1)
474 long int len;
476 /* T2. Next Relation. */
477 len = readtoken (fp, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
478 if (len < 0)
479 break;
481 assert (len != 0);
483 k = search_item (root, tokenbuffer.buffer);
484 if (j)
486 /* T3. Record the relation. */
487 record_relation (j, k);
488 k = NULL;
491 j = k;
494 /* T1. Initialize (N <- n). */
495 walk_tree (root, count_items);
497 while (n_strings > 0)
499 /* T4. Scan for zeros. */
500 walk_tree (root, scan_zeros);
502 while (head)
504 struct successor *p = head->top;
506 /* T5. Output front of queue. */
507 printf ("%s\n", head->str);
508 head->str = NULL; /* Avoid printing the same string twice. */
509 n_strings--;
511 /* T6. Erase relations. */
512 while (p)
514 p->suc->count--;
515 if (p->suc->count == 0)
517 zeros->qlink = p->suc;
518 zeros = p->suc;
521 p = p->next;
524 /* T7. Remove from queue. */
525 head = head->qlink;
528 /* T8. End of process. */
529 assert (n_strings >= 0);
530 if (n_strings > 0)
532 /* The input contains a loop. */
533 error (0, 0, _("%s: input contains a loop:"),
534 (have_read_stdin ? "-" : file));
535 exit_status = 1;
537 /* Print the loop and remove a relation to break it. */
539 walk_tree (root, detect_loop);
540 while (loop);
546 main (int argc, char **argv)
548 int opt;
550 program_name = argv[0];
551 setlocale (LC_ALL, "");
552 bindtextdomain (PACKAGE, LOCALEDIR);
553 textdomain (PACKAGE);
555 atexit (close_stdout);
557 exit_status = 0;
559 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, VERSION,
560 AUTHORS, usage);
562 while ((opt = getopt_long (argc, argv, "", long_options, NULL)) != -1)
563 switch (opt)
565 case 0: /* long option */
566 break;
567 default:
568 usage (EXIT_FAILURE);
571 have_read_stdin = 0;
573 if (optind + 1 < argc)
575 error (0, 0, _("only one argument may be specified"));
576 usage (EXIT_FAILURE);
579 if (optind < argc)
580 tsort (argv[optind]);
581 else
582 tsort ("-");
584 if (have_read_stdin && fclose (stdin) == EOF)
585 error (EXIT_FAILURE, errno, _("standard input"));
587 exit (exit_status == 0 ? EXIT_SUCCESS : EXIT_FAILURE);