Page 136
Our version of getword
does not properly handle underscores, string constants,
comments, or preprocessor control lines. Write a better version.
/* What exactly is 'proper' handling here? In this particular context, none of those
* things are useful. I assume I am meant to be modifying `getword` to be applicable in
* a broader context. My intepretation is that underscores should be allowed in words,
* and that words should be ignored if in comments, strings, or preprocessor control
* lines. */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define MAXWORD 100
#define NKEYS (sizeof keytab / sizeof(struct key))
struct key {
char *word;
;
int count;
} keytab[] = { { "auto", 0 }, { "break", 0 }, { "case", 0 },
{ "char", 0 }, { "const", 0 }, { "continue", 0 },
{ "default", 0 }, { "do", 0 }, { "double", 0 },
{ "else", 0 }, { "enum", 0 }, { "extern", 0 },
{ "float", 0 }, { "for", 0 }, { "goto", 0 },
{ "if", 0 }, { "int", 0 }, { "long", 0 },
{ "register", 0 }, { "return", 0 }, { "short", 0 },
{ "signed", 0 }, { "sizeof", 0 }, { "static", 0 },
{ "struct", 0 }, { "switch", 0 }, { "typedef", 0 },
{ "union", 0 }, { "unsigned", 0 }, { "void", 0 },
{ "volatile", 0 }, { "while", 0 } };
int getword(char *, int);
int binsearch(char *, struct key *, int);
int main()
{
int n;
char word[MAXWORD];
while (getword(word, MAXWORD) != EOF) {
if (isalpha(word[0])) {
if ((n = binsearch(word, keytab, NKEYS)) >= 0) {
keytab[n].count++;
}
}
}
for (n = 0; n < NKEYS; n++) {
if (keytab[n].count > 0) {
printf("%4d %s\n", keytab[n].count, keytab[n].word);
}
}
return 0;
}
int binsearch(char *word, struct key tab[], int n)
{
int cond;
int low, high, mid;
low = 0;
high = n - 1;
while (low <= high) {
mid = (low + high) / 2;
if ((cond = strcmp(word, tab[mid].word)) < 0) {
high = mid - 1;
} else if (cond > 0) {
low = mid + 1;
} else {
return mid;
}
}
return -1;
}
int getword(char *word, int lim)
{
int c, getch(void);
void ungetch(int);
char *w = word;
while (isspace(c = getch()))
;
/* Skip comments, strings, preprocessor statements */
char *term = "";
if (c == '/') {
c = getch();
if (c == '/') {
term = "\n";
} else if (c == '*') {
term = "*/";
} else {
ungetch(c);
c = '/';
}
} else if (c == '#') {
term = "\n";
} else if (c == '"') {
term = "\"";
}
for (int i = 0; term[i] != '\0' && (c = getch()) != EOF;) {
if (c == term[i]) {
i++;
}
}
if (c != EOF) {
*w++ = c;
}
if (!isalpha(c) && c != '_') {
*w = '\0';
return c;
}
for (; --lim > 0; w++) {
if (!isalnum(*w = getch()) && *w != '_') {
ungetch(*w);
break;
}
}
*w = '\0';
return word[0];
}
#define BUFSIZE 100
char buf[BUFSIZE];
int bufp = 0;
int getch(void)
{
return (bufp > 0) ? buf[--bufp] : getchar();
}
void ungetch(int c)
{
if (bufp >= BUFSIZE) {
printf("ungetch: too many characters\n");
} else {
buf[bufp++] = c;
}
}