Page 143

Write a program that prints the distinct words in its input sorted into decreasing order of frequency of occurrence. Precede each word by its count.

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

#include <string.h>

#define MAX_LEN 1000

struct node {
	char *word;
	int count;
	struct node *left;
	struct node *right;
};

int getword(char *word, int lim);
struct node *add_node_by_word(struct node *node, char *word);
void print_tree(struct node *root);
struct node *sort_tree_by_count(struct node *count_tree,
				struct node *word_tree);

int main()
{
	char str[MAX_LEN];
	struct node *word_tree = NULL;

	int c;
	while ((c = getword(str, MAX_LEN)) != EOF) {
		if (isalpha(str[0])) {
			word_tree = add_node_by_word(word_tree, str);
		}
	}

	struct node *count_tree = sort_tree_by_count(NULL, word_tree);

	print_tree(count_tree);
}

struct node *alloc_node();
struct node *add_node_by_word(struct node *node, char *word)
{
	int comp_res;
	if (node == NULL) {
		node = alloc_node();
		node->word = strdup(word);
		node->count = 1;
		node->left = NULL;
		node->right = NULL;
	} else if ((comp_res = strcmp(word, node->word)) == 0) {
		node->count++;
	} else if (comp_res < 0) {
		node->left = add_node_by_word(node->left, word);
	} else { // comp_res > 0
		node->right = add_node_by_word(node->right, word);
	}

	return node;
}

struct node *add_node_by_count(struct node *node, struct node *new);
struct node *sort_tree_by_count(struct node *count_tree, struct node *word_tree)
{
	if (word_tree != NULL) {
		count_tree = sort_tree_by_count(count_tree, word_tree->left);

		count_tree = add_node_by_count(count_tree, word_tree);

		count_tree = sort_tree_by_count(count_tree, word_tree->right);
	}
	return count_tree;
}

struct node *add_node_by_count(struct node *node, struct node *new)
{
	if (node == NULL) {
		node = alloc_node();
		node->word = new->word;
		node->count = new->count;
		node->left = NULL;
		node->right = NULL;
	} else if (new->count >= node->count) {
		node->left = add_node_by_count(node->left, new);
	} else { // new->count < node->count
		node->right = add_node_by_count(node->right, new);
	}
	return node;
}

struct node *alloc_node()
{
	return (struct node *)malloc(sizeof(struct node));
}

void print_tree(struct node *node)
{
	if (node != NULL) {
		print_tree(node->left);

		printf("%d %s\n", node->count, node->word);

		print_tree(node->right);
	}
}
int getch();
void ungetch(int c);

int getword(char *word, int lim)
{
	int c, getch(void);
	void ungetch(int);
	char *w = word;

	while (isspace(c = getch()))
		;

	if (c != EOF) {
		*w++ = c;
	}

	if (!isalpha(c) && c != '_') {
		*w = '\0';
		return c;
	}

	for (; --lim > 0; w++) {
		if (!isalnum(*w = getch()) && *w != '_') {
			ungetch(*w);
			break;
		}
	}
	*w = '\0';

	return word[0];
}

#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;

int getch(void)
{
	return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c)
{
	if (bufp >= BUFSIZE) {
		printf("ungetch: too many characters\n");
	} else {
		buf[bufp++] = c;
	}
}