Page 143
Write a cross-referencer that prints a list of all words in a document, and, for each word, a list of the line numbers on which it occurs. Remove noise words like "the", "and," and so on.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define MAX_LEN 1000
struct ll_node {
int line_num;
struct ll_node *next;
};
struct tree_node {
char *word;
struct ll_node *line_ll;
struct tree_node *left;
struct tree_node *right;
};
int getword(char *word, int lim);
struct tree_node *add_node(struct tree_node *node, char *word, int line_num);
void print_tree(struct tree_node *root);
int main()
{
char str[MAX_LEN];
struct tree_node *root = NULL;
int c, line_num = 1;
while ((c = getword(str, MAX_LEN)) != EOF) {
if (c == '\n') {
line_num++;
} else if (isalpha(str[0])) {
root = add_node(root, str, line_num);
}
}
print_tree(root);
}
struct tree_node *alloc_tree_node();
struct ll_node *alloc_ll_node();
struct tree_node *add_node(struct tree_node *node, char *word, int line_num)
{
int comp_res;
if (node == NULL) {
node = alloc_tree_node();
node->word = strdup(word);
node->line_ll = alloc_ll_node();
node->line_ll->line_num = line_num;
node->left = NULL;
node->right = NULL;
} else if ((comp_res = strcmp(word, node->word)) == 0) {
struct ll_node *ll_node = alloc_ll_node();
ll_node->line_num = line_num;
ll_node->next = node->line_ll;
node->line_ll = ll_node;
} else if (comp_res < 0) {
node->left = add_node(node->left, word, line_num);
} else { // comp_res > 0
node->right = add_node(node->right, word, line_num);
}
return node;
}
struct tree_node *alloc_tree_node()
{
return (struct tree_node *)malloc(sizeof(struct tree_node));
}
struct ll_node *alloc_ll_node()
{
return (struct ll_node *)malloc(sizeof(struct ll_node));
}
void print_tree(struct tree_node *node)
{
if (node != NULL) {
print_tree(node->left);
printf("%s: ", node->word);
struct ll_node *ll_node = node->line_ll;
do {
printf("%d ", ll_node->line_num);
} while ((ll_node = ll_node->next) != NULL);
putchar('\n');
print_tree(node->right);
}
}
int getch();
void ungetch(int c);
int getword(char *word, int lim)
{
int c, getch(void);
void ungetch(int);
char *w = word;
if ((c = getch()) == '\n') {
return '\n';
}
while (isspace(c))
c = getch();
if (c != EOF) {
*w++ = c;
}
if (!isalpha(c)) {
*w = '\0';
return c;
}
for (; --lim > 0; w++) {
if (!isalnum(*w = getch())) {
ungetch(*w);
break;
}
}
*w = '\0';
return word[0];
}
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;
int getch(void)
{
return (bufp > 0) ? buf[--bufp] : getchar();
}
void ungetch(int c)
{
if (bufp >= BUFSIZE) {
printf("ungetch: too many characters\n");
} else {
buf[bufp++] = c;
}
}