Bug499183 - FreeBSD: differences in avx-vmovq output
[valgrind.git] / cachegrind / docs / concord.c
blob7ebdbea654ac680926947010445308f8a58c4207
1 /********************************************************************************
2 ** Program: concord.c
3 ** By: Nick Nethercote, 36448. Any code taken from elsewhere as noted.
4 ** For: 433-253 assignment 3.
5 **
6 ** Program description: This program is a tool for finding specific
7 ** occurrences of words in a text; it can count the number of times a single
8 ** word appears, or list the lines that a word, or multiple words, all appear
9 ** on. See the project specification for more detail.
10 ** The primary data structure used is a static hash table, of fixed size.
11 ** Any collisions of words hashing to the same position in the table are
12 ** dealt with via separate chaining. Also, for each word, there is a
13 ** subsidiary linked list containing the line numbers that the word appears on.
14 ** Thus there are linked lists within linked lists.
15 ** I have implemented the entire program within one file, partly because
16 ** there isn't a great deal of code, and partly because I haven't yet done
17 ** 433-252, and thus don't know a great deal about .h files, makefiles, etc.
20 #include <stdio.h>
21 #include <ctype.h>
22 #include <stdlib.h>
23 #include <string.h>
25 #define TRUE 1
26 #define FALSE 0
27 #define MAX_WORD_LENGTH 100
28 #define DUMMY_WORD_LENGTH 2
29 #define TABLE_SIZE 997
30 #define BEFORE_WORD 1
31 #define IN_WORD 2
32 #define AFTER_WORD 3
33 #define HASH_CONSTANT 256
34 #define ARGS_NUMBER 1
36 typedef struct word_node Word_Node;
37 typedef struct line_node Line_Node;
38 typedef struct word_info Word_Info;
39 typedef struct arg_node Arg_Node;
41 /* Linked list node for storing each word */
42 struct word_node {
43 char *word; /* The actual word */
44 int number; /* The number of occurrences */
45 Line_Node *line_list; /* Points to the linked list of line numbers */
46 Line_Node *last_line; /* Points to the last line node, for easy append */
47 Word_Node *next_word; /* Next node in list */
50 /* Subsidiary linked list node for storing line numbers */
51 struct line_node {
52 int line;
53 Line_Node *next_line;
56 /* Structure used when reading each word, and it line number, from file. */
57 struct word_info {
58 char word[MAX_WORD_LENGTH];
59 int line;
62 /* Linked list node used for holding multiple arguments from the program's
63 ** internal command line. Also, can point to a list of line numbers; this
64 ** is used when displaying line numbers.
65 */
66 struct arg_node {
67 char *word;
68 Line_Node *line_list;
69 Arg_Node *next_arg;
72 int hash(char *word);
73 void *create(int mem_size);
74 void init_hash_table(char *file_name, Word_Node *table[]);
75 int get_word(Word_Info *data, int line, FILE *file_ptr);
76 void insert(char *inword, int in_line, Word_Node *table[]);
77 Word_Node *new_word_node(char *inword, int in_line);
78 Line_Node *add_existing(Line_Node *curr, int in_line);
79 void interact(Word_Node *table[]);
80 Arg_Node *place_args_in_list(char command[]);
81 Arg_Node *append(char *word, Arg_Node *head);
82 void count(Arg_Node *head, Word_Node *table[]);
83 void list_lines(Arg_Node *head, Word_Node *table[]);
84 void intersection(Arg_Node *head);
85 void intersect_array(int master[], int size, Arg_Node *arg_head);
86 void kill_arg_list(Arg_Node *head);
88 int main(int argc, char *argv[])
90 /* The actual hash table, a fixed-size array of pointers to word nodes */
91 Word_Node *table[TABLE_SIZE];
93 /* Checking command line input for one file name */
94 if (argc != ARGS_NUMBER + 1) {
95 fprintf(stderr, "%s requires %d argument\n", argv[0], ARGS_NUMBER);
96 exit(EXIT_FAILURE);
99 init_hash_table(argv[1], table);
100 interact(table);
102 /* Nb: I am not freeing the dynamic memory in the hash table, having been
103 ** told this is not necessary. */
104 return 0;
107 /* General dynamic allocation function that allocates and then checks. */
108 void *create(int mem_size)
110 void *dyn_block;
112 dyn_block = malloc(mem_size);
113 if (!(dyn_block)) {
114 fprintf(stderr, "Couldn't allocate enough memory to continue.\n");
115 exit(EXIT_FAILURE);
118 return dyn_block;
121 /* Function returns a hash value on a word. Almost identical to the hash
122 ** function presented in Sedgewick.
124 int hash(char *word)
126 int hash_value = 0;
128 for ( ; *word; word++)
129 hash_value = (HASH_CONSTANT * hash_value + *word) % TABLE_SIZE;
131 return hash_value;
134 /* Function builds the hash table from the given file. */
135 void init_hash_table(char *file_name, Word_Node *table[])
137 FILE *file_ptr;
138 Word_Info *data;
139 int line = 1, i;
141 /* Structure used when reading in words and line numbers. */
142 data = (Word_Info *) create(sizeof(Word_Info));
144 /* Initialise entire table to NULL. */
145 for (i = 0; i < TABLE_SIZE; i++)
146 table[i] = NULL;
148 /* Open file, check it. */
149 file_ptr = fopen(file_name, "r");
150 if (!(file_ptr)) {
151 fprintf(stderr, "Couldn't open '%s'.\n", file_name);
152 exit(EXIT_FAILURE);
155 /* 'Get' the words and lines one at a time from the file, and insert them
156 ** into the table one at a time. */
157 while ((line = get_word(data, line, file_ptr)) != EOF)
158 insert(data->word, data->line, table);
160 free(data);
161 fclose(file_ptr);
164 /* Function reads the next word, and it's line number, and places them in the
165 ** structure 'data', via a pointer.
167 int get_word(Word_Info *data, int line, FILE *file_ptr)
169 int index = 0, pos = BEFORE_WORD;
171 /* Only alphabetic characters are read, apostrophes are ignored, and other
172 ** characters are considered separators. 'pos' helps keep track whether
173 ** the current file position is inside a word or between words.
175 while ((data->word[index] = tolower(fgetc(file_ptr))) != EOF) {
176 if (data->word[index] == '\n')
177 line++;
178 if (islower(data->word[index])) {
179 if (pos == BEFORE_WORD) {
180 pos = IN_WORD;
181 data->line = line;
183 index++;
185 else if ((pos == IN_WORD) && (data->word[index] != '\'')) {
186 break;
189 /* Signals end of file has been reached. */
190 if (data->word[index] == EOF)
191 line = EOF;
193 /* Adding the null character. */
194 data->word[index] = '\0';
196 return line;
199 /* Function inserts a word and it's line number into the hash table. */
200 void insert(char *inword, int in_line, Word_Node *table[])
202 int position = hash(inword);
203 Word_Node *curr, *prev = NULL;
204 char dummy_word[DUMMY_WORD_LENGTH] = "A";
206 /* The case where that hash position hasn't been used before; a new word
207 ** node is created.
209 if (table[position] == NULL)
210 table[position] = new_word_node(dummy_word, 0);
211 curr = table[position];
213 /* Traverses that position's list of words until the current word is found
214 ** (i.e. it's come up before) or the list end is reached (i.e. it's the
215 ** first occurrence of the word).
217 while ((curr != NULL) && (strcmp(inword, curr->word) > 0)) {
218 prev = curr;
219 curr = curr->next_word;
222 /* If the word hasn't appeared before, it's inserted alphabetically into
223 ** the list.
225 if ((curr == NULL) || (strcmp(curr->word, inword) != 0)) {
226 prev->next_word = new_word_node(inword, in_line);
227 prev->next_word->next_word = curr;
229 /* Otherwise, the word count is incremented, and the line number is added
230 ** to the existing list.
232 else {
233 (curr->number)++;
234 curr->last_line = add_existing(curr->last_line, in_line);
238 /* Function creates a new node for when a word is inserted for the first time.
240 Word_Node *new_word_node(char *inword, int in_line)
242 Word_Node *new;
244 new = (Word_Node *) create(sizeof(Word_Node));
245 new->word = (char *) create(sizeof(char) * (strlen(inword) + 1));
246 new->word = strcpy(new->word, inword);
247 /* The word count is set to 1, as this is the first occurrence! */
248 new->number = 1;
249 new->next_word = NULL;
250 /* One line number node is added. */
251 new->line_list = (Line_Node *) create(sizeof(Line_Node));
252 new->line_list->line = in_line;
253 new->line_list->next_line = NULL;
254 new->last_line = new->line_list;
256 return new;
259 /* Function adds a line number to the line number list of a word that has
260 ** already been inserted at least once. The pointer 'last_line', part of
261 ** the word node structure, allows easy appending to the list.
263 Line_Node *add_existing(Line_Node *last_line, int in_line)
265 /* Check to see if that line has already occurred - multiple occurrences on
266 ** the one line are only recorded once. (Nb: They are counted twice, but
267 ** only listed once.)
269 if (last_line->line != in_line) {
270 last_line->next_line = (Line_Node *) create(sizeof(Line_Node));
271 last_line = last_line->next_line;
272 last_line->line = in_line;
273 last_line->next_line = NULL;
276 return last_line;
279 /* Function controls the interactive command line part of the program. */
280 void interact(Word_Node *table[])
282 char args[MAX_WORD_LENGTH]; /* Array to hold command line */
283 Arg_Node *arg_list = NULL; /* List that holds processed arguments */
284 int not_quitted = TRUE; /* Quit flag */
286 /* The prompt (?) is displayed. Commands are read into an array, and then
287 ** individual arguments are placed into a linked list for easy use.
288 ** The first argument (actually the command) is looked at to determine
289 ** what action should be performed. 'arg_list->next_arg' is passed to
290 ** count() and list_lines(), because the actual 'c' or 'l' is not needed
291 ** by them. Lastly, the argument linked list is freed, by 'kill_arg_list'.
293 do {
294 printf("?");
295 fgets(args, MAX_WORD_LENGTH - 1, stdin);
296 arg_list = place_args_in_list(args);
297 if (arg_list) {
298 if (strcmp(arg_list->word, "c") == 0)
299 count(arg_list->next_arg, table);
300 else if (strcmp(arg_list->word, "l") == 0)
301 list_lines(arg_list->next_arg, table);
302 else if (strcmp(arg_list->word, "q") == 0) {
303 printf("Quitting concord\n");
304 not_quitted = FALSE;
306 else
307 printf("Not a valid command.\n");
308 kill_arg_list(arg_list);
310 } while (not_quitted); /* Quits on flag */
313 /* Function takes an array containing a command line, and parses it, placing
314 ** actual word into a linked list.
316 Arg_Node *place_args_in_list(char command[])
318 int index1 = 0, index2 = 0, pos = BEFORE_WORD;
319 char token[MAX_WORD_LENGTH], c;
320 Arg_Node *head = NULL;
322 /* Non alphabetic characters are discarded. Alphabetic characters are
323 ** copied into the array 'token'. Once the current word has been copied
324 ** into 'token', 'append' is called, copying 'token' to a new node in the
325 ** linked list.
327 while (command[index1] != '\0') {
328 c = tolower(command[index1++]);
329 if (islower(c)) {
330 token[index2++] = c;
331 pos = IN_WORD;
333 else if (c == '\'')
334 token[index2] = c;
335 else if (pos == IN_WORD) {
336 pos = BEFORE_WORD;
337 token[index2] = '\0';
338 head = append(token, head);
339 index2 = 0;
343 return head;
346 /* Function takes a word, and appends a new node containing that word to the
347 ** list.
349 Arg_Node *append(char *word, Arg_Node *head)
351 Arg_Node *curr = head,
352 *new = (Arg_Node *) create(sizeof(Arg_Node));
354 new->word = (char *) create(sizeof(char) * (strlen(word) + 1));
355 strcpy(new->word, word);
356 new->line_list = NULL;
357 new->next_arg = NULL;
359 if (head == NULL)
360 return new;
362 while (curr->next_arg != NULL)
363 curr = curr->next_arg;
364 curr->next_arg = new;
366 return head;
370 /* Function displays the number of times a word has occurred. */
371 void count(Arg_Node *arg_list, Word_Node *table[])
373 int hash_pos = 0; /* Only initialised to avoid gnuc warnings */
374 Word_Node *curr_word = NULL;
376 /* Checking for the right number of arguments (one). */
377 if (arg_list) {
378 if (arg_list->next_arg != NULL) {
379 printf("c requires only one argument\n");
380 return;
382 hash_pos = hash(arg_list->word);
384 else
385 return;
387 /* Finds if the supplied word is in table, firstly by hashing to it's
388 ** would be position, and then traversing the list of words. If present,
389 ** it's number is displayed, otherwise '0' is printed.
391 if (table[hash_pos]) {
392 curr_word = table[hash_pos]->next_word;
393 while ((curr_word != NULL) &&
394 (strcmp(arg_list->word, curr_word->word) != 0))
395 curr_word = curr_word->next_word;
396 if (curr_word)
397 printf("%d\n", curr_word->number);
398 else
399 printf("0\n");
401 else
402 printf("0\n");
405 /* Function that takes each node in the argument list, and directs a pointer
406 ** to that word's list of lines, which are present in the hash table.
408 void list_lines(Arg_Node *arg_head, Word_Node *table[])
410 int hash_pos = 0; /* Only initialised to avoid gnuc warnings */
411 Word_Node *curr_word;
412 Arg_Node *curr_arg = arg_head;
414 /* For each word in the list of arguments, the word is looked for in the
415 ** hash table. Each argument node has a pointer, and if the word is there,
416 ** that pointer is set to point at that word's list of line numbers.
418 while (curr_arg != NULL) {
419 hash_pos = hash(curr_arg->word);
420 if (table[hash_pos]) {
421 curr_word = table[hash_pos]->next_word; /* Gets past dummy node */
422 while (curr_word != NULL &&
423 strcmp(curr_arg->word, curr_word->word) != 0)
424 curr_word = curr_word->next_word;
425 if (curr_word)
426 curr_arg->line_list = curr_word->line_list;
428 curr_arg = curr_arg->next_arg;
430 /* An intersection is then performed, to determine which lines, if any,
431 ** all the arguments appear on.
433 if (arg_head)
434 intersection(arg_head);
437 /* Function takes a list of line lists, and finds the lines that are common
438 ** to each line list, by using a comparison array.
440 void intersection(Arg_Node *arg_head)
442 Line_Node *curr_line;
443 int *master, n = 0, index = 0, output = FALSE;
445 /* Find size of first list, for creating master array */
446 curr_line = arg_head->line_list;
447 while (curr_line) {
448 n++;
449 curr_line = curr_line->next_line;
452 /* The master comparison array is created. */
453 master = (int *) create(sizeof(int) * n);
454 curr_line = arg_head->line_list;
456 /* Copy first list into master array */
457 while (curr_line) {
458 *(master + index++) = curr_line->line;
459 curr_line = curr_line->next_line;
462 /* Perform the actual intersection. */
463 intersect_array(master, n, arg_head->next_arg);
465 /* Print the line numbers left in the processed array, those left contain
466 ** all the words specified in the command.
468 for (index = 0; index < n; index++)
469 if (*(master + index) != 0) {
470 printf("%d ", *(master + index));
471 output = TRUE;
473 /* 'Output' merely prevents an unnecessary newline when 'l' returns no
474 ** answer.
476 if (output)
477 printf("\n");
479 /* Deallocate dynamic memory for master array */
480 free(master);
483 /* Function takes master array containing line numbers - these depend on the
484 ** first list of lines, and is done in 'list_lines'. It then moves through the
485 ** argument list. For each word, each line number in master is compared to each
486 ** line number in that word's line list. If there is no match, then that
487 ** position in the array is set to 0, because that line is no longer in
488 ** contention as an answer.
490 void intersect_array(int master[], int size, Arg_Node *arg_head)
492 int index = 0;
493 Line_Node *curr_line;
495 while (arg_head) {
496 index = 0;
497 curr_line = arg_head->line_list;
498 /* For each line in the list, any number less than that in the array will
499 ** be set to zero. Any number equal to that in the list will remain.
500 ** This loop depends on the fact that both the line list, and the master
501 ** array, are sorted. */
502 while (curr_line) {
503 while (*(master + index) < curr_line->line && index < size)
504 *(master + index++) = 0;
505 while (*(master + index) <= curr_line->line && index < size)
506 index++;
507 curr_line = curr_line->next_line;
509 /* Once the list of lines has been traversed, any array positions that
510 ** haven't been examined are set to zero, as they are no longer in
511 ** contention.
513 for ( ; index < size; index++)
514 *(master + index) = 0;
516 arg_head = arg_head->next_arg;
520 /* Function to free dynamic memory used by the arguments linked list. */
521 void kill_arg_list(Arg_Node *head)
523 Arg_Node *temp;
525 while (head != NULL) {
526 temp = head;
527 head = head->next_arg;
528 free(temp->word);
529 free(temp);