/* Extract comments from a C program source file. ** ** This program acts as a filter to copy comments in a C source ** file to the output. Each comment includes the starting and ** ending delimiters and is followed by a newline. ** ** Three #ifdef options are defined: ** INHIBIT_TRIGRAPHS prevents recognition of trigraphs, which ** can affect detection of escaped characters, ** i.e., ??/" is an escaped quote. ** TRANSLATE_TRIGRAPHS causes the output to have trigraphs ** converted to the normal C characters. ** CPP_MODE causes "//" to start a comment. ** The default for these symbols is undefined, resulting in ** operation on strict ANSI source, except as noted below. ** ** What makes this program interesting is that comment detection ** should be inhibited within strings and character constants. ** ** Note: The name of a header following #include can, under ANSI, ** contain any sequence of characters, except \n and the closing ** > or ". This program doesn't inhibit comment, string, or character ** constant detection within the header name, as an ANSI parser must. ** ** Written by and contributed to the public domain by ** Thad Smith III, Boulder, CO, October 1990. */ #include #ifndef INHIBIT_TRIGRAPHS /* default: recognize trigraphs */ #define getnc() getnsc(1) /* get char with trigraph xlate */ #ifdef TRANSLATE_TRIGRAPHS #define getcmtc() getnsc(1) /* get comment char w/ t.g. xlate */ #else #define getcmtc() getnsc(0) /* default: no comment t.g. xlate */ #endif /* ** get next source character or EOF */ int getnsc(int cvtg) /* boolean: convert trigraphs */ { static int c, nc, nnc; /* next 3 characters */ /* shift in next source character */ c = nc; nc = nnc; nnc = getchar(); /* perform trigraph substitution */ if (cvtg && c == '?' && nc == '?') { switch (nnc) { case '=' : c = '#' ; break; case '(' : c = '[' ; break; case '/' : c = '\\'; break; case ')' : c = ']' ; break; case '\'': c = '^' ; break; case '<' : c = '{' ; break; case '!' : c = '|' ; break; case '>' : c = '}' ; break; case '-' : c = '~' ; break; default : return c; /* no substitution */ } nc = getchar(); nnc = getchar(); } return c; } #else /* don't process trigraphs */ #define getnc() getchar() #define getcmtc() getchar() #endif int main(void) { int pc; /* previous character */ int c; /* current input character */ #ifndef INHIBIT_TRIGRAPHS getnc(); /* prime the pump */ getnc(); #endif c = getnc(); /* get first char */ for (;;) /* in non-comment area */ { switch (c) { case '/': /* possible start of comment */ if ((c= getnc()) == '*') /* process comment */ { putchar('/'); putchar('*'); /* copy comment to stdout */ for (pc = 0; (c = getcmtc()) != EOF && (putchar(c) != '/' || pc != '*'); pc=c) ; putchar('\n'); #ifdef CPP_MODE } else if (c == '/') /* '//' comment */ { putchar('/'); putchar('/'); while ((c = getcmtc()) != EOF && putchar(c) != '\n') ; #endif } else continue; /* test current char */ break; case '\"': /* start of string */ case '\'': /* start of (possibly multi-byte) char constant */ pc = c; /* save delimiter */ do /* scan through character constant, ** discarding escape chars */ { while ((c = getnc()) == '\\') getnc(); } while (c != pc && c != EOF); break; } if (c == EOF) return 0; else c = getnc(); } }