Page 143

Write a program that reads a C program and prints in alphabetical order each group of variable names that are identical in the first 6 characters but different somewhere thereafter. Don't count words within strings and comments. Make 6 a parameter that can be set from the command line.

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LEN 1000
#define DEFAULT_MATCH_LEN 6

struct nnode {
	char *name;
	int count;
	struct nnode *left;
	struct nnode *right;
};

struct pnode {
	char *pattern;
	struct nnode *name_tree;
	struct pnode *left;
	struct pnode *right;
};

int get_next_name(char storage[], int max_len);
struct pnode *add_pnode(struct pnode *root, char *name, int match_len);
void print_ptree(struct pnode *root);

int main(int argc, char *argv[])
{
	/* Read parameter from command line */
	int match_len = DEFAULT_MATCH_LEN;
	for (int i = 0; i < argc; i++) {
		if (i + 1 < argc && strcmp(argv[i], "-n") == 0) {
			int num_provided = atoi(argv[i + 1]);
			if (!num_provided) {
				printf("error: non-zero integer must be passed to -n\n");
			} else {
				match_len = num_provided;
			}
		}
	}

	/* Get variable names */
	char word[MAX_LEN];
	struct pnode *root = NULL;
	while (get_next_name(word, MAX_LEN) != EOF) {
		/* Put into tree */
		if (isalpha(word[0])) {
			root = add_pnode(root, word, match_len);
		}
	}

	/* Print tree */
	print_ptree(root);
	return 0;
}

struct pnode *alloc_pnode(void);
struct nnode *add_nnode(struct nnode *root, char *name);

struct pnode *add_pnode(struct pnode *node, char *name, int match_len)
{
	int comp_res;

	if (node == NULL) {
		node = alloc_pnode();
		node->pattern = strndup(name, match_len);
		node->name_tree = add_nnode(NULL, name);
		node->left = NULL;
		node->right = NULL;
	} else if ((comp_res = strncmp(name, node->pattern, match_len)) == 0) {
		add_nnode(node->name_tree, name);
	} else if (comp_res < 0) {
		node->left = add_pnode(node->left, name, match_len);
	} else { // comp_res > 0
		node->right = add_pnode(node->right, name, match_len);
	}

	return node;
}

struct pnode *alloc_pnode(void)
{
	return (struct pnode *)malloc(sizeof(struct pnode));
}

struct nnode *alloc_nnode(void);

struct nnode *add_nnode(struct nnode *node, char *name)
{
	int comp_res;

	if (node == NULL) {
		node = alloc_nnode();
		node->name = strdup(name);
		node->count = 1;
		node->left = NULL;
		node->right = NULL;
	} else if ((comp_res = strcmp(name, node->name)) == 0) {
		node->count++;
	} else if (comp_res < 0) {
		node->left = add_nnode(node->left, name);
	} else { // comp_res > 0
		node->right = add_nnode(node->right, name);
	}

	return node;
}

struct nnode *alloc_nnode(void)
{
	return (struct nnode *)malloc(sizeof(struct nnode));
}

void print_ntree(struct nnode *root);

void print_ptree(struct pnode *node)
{
	if (node != NULL) {
		print_ptree(node->left);
		printf("Pattern '%s':\n", node->pattern);
		print_ntree(node->name_tree);
		putchar('\n');
		print_ptree(node->right);
	}
}

void print_ntree(struct nnode *node)
{
	if (node != NULL) {
		print_ntree(node->left);
		printf("%4d %s\n", node->count, node->name);
		print_ntree(node->right);
	}
}

int getch();
void ungetch(int c);

int get_next_name(char *out, int lim)
{
	int c, getch(void);
	void ungetch(int);
	char *w = out;

	while (isspace(c = getch()))
		;

	/* Skip comments, strings, preprocessor statements */
	char *term = "";
	if (c == '/') {
		c = getch();
		if (c == '/') {
			term = "\n";
		} else if (c == '*') {
			term = "*/";
		} else {
			ungetch(c);
			c = '/';
		}
	} else if (c == '#') {
		term = "\n";
	} else if (c == '"') {
		term = "\"";
	}
	for (int i = 0; term[i] != '\0' && (c = getch()) != EOF;) {
		if (c == term[i]) {
			i++;
		}
	}

	if (c != EOF) {
		*w++ = c;
	}

	if (!(isalpha(c) || c == '_')) {
		*w = '\0';
		return c;
	}

	for (; --lim > 0; w++) {
		if (!(isalnum(*w = getch()) || *w == '_')) {
			ungetch(*w);
			break;
		}
	}
	*w = '\0';

	return out[0];
}

#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;

int getch(void)
{
	return (bufp > 0) ? buf[--bufp] : getchar();
}

void ungetch(int c)
{
	if (bufp >= BUFSIZE) {
		printf("ungetch: too many characters\n");
	} else {
		buf[bufp++] = c;
	}
}