/* * c2html.c * * Converts c and c++ code into HTML for publishing on the WWW * Copyright (C) 1996-1999 Christopher Kohlhoff (chris@kohlhoff.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "config.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #if defined(HAVE_DIR_H) #include <dir.h> #endif /********************************************************************* * Structures and Definitions */ #define MAXIDENT 512 #define MAXLINE 512 #define KEYCHUNK 16 #define VALUECHUNK 32 /* StringArray structure for a sorted list of keywords */ typedef struct StringArray { char **strings; int num; } StringArray; /* Highlighting scheme to be used */ typedef struct Scheme { int tabwidth; char *keywords; char *space; char *newline; char *specialchar; char *code_b; char *code_e; char *comment_b; char *comment_e; char *directive_b; char *directive_e; char *constant_b; char *constant_e; char *keyword_b; char *keyword_e; } Scheme; /* Output file and related context */ typedef struct OutputFile { Scheme scheme; FILE *file; int column; } OutputFile; /********************************************************************* * Function declarations */ int ConvertCodeToHtml(const char *filename, OutputFile *output); void OutputChar(char c, OutputFile *output); void OutputString(const char *str, OutputFile *output); void OutputFormat(const char *str, OutputFile *output); /* StringArray functions */ int StringArrayRead(StringArray *array, const char *filename); char *StringArrayFind(StringArray *array, const char *match); int StringArrayCmp(const void *a, const void *b); FILE *FOpenOnPath(const char *fname, const char *mode); /* Scheme functions */ int SchemeRead(Scheme * scheme, const char *filename); char *SchemeReadKey(FILE *fp); char *SchemeReadValue(FILE *fp); /********************************************************************* * Program entry point */ int main(int argc, char *argv[]) { OutputFile output; if (argc < 2) { fprintf(stderr, "Usage: c2html <filename> [<scheme file>]\n"); return EXIT_FAILURE; } /* Initialise the output file */ SchemeRead(&output.scheme, argc > 2 ? argv[2] : "c2html.scheme"); output.file = stdout; output.column = 0; if (!ConvertCodeToHtml(argv[1], &output)) return EXIT_FAILURE; return EXIT_SUCCESS; } /********************************************************************* * Machine States */ typedef enum State { NORMAL, NORMAL_LINE_START, HALF_COMMENT, C_COMMENT, END_C_COMMENT, CPP_COMMENT, DIRECTIVE_HALF_COMMENT, DIRECTIVE_C_COMMENT, DIRECTIVE_END_C_COMMENT, SINGLE_STRING, SINGLE_STRING_ESC, DOUBLE_STRING, DOUBLE_STRING_ESC, DIRECTIVE, DIRECTIVE_LINE_WRAP, NUMBER_BEFORE_DOT, NUMBER_AFTER_DOT, IDENTIFIER } State; /********************************************************************* * ConvertCodeToHtml * * A State Machine that turns C or C++ code into HTML */ int ConvertCodeToHtml(const char *filename, OutputFile *output) { int c; /* the character just read */ State state; /* the current state of the machine */ int write; /* should we write the character just read */ char ident[MAXIDENT + 1]; /* the current identifier being read */ int nident; /* the position in the current identifier */ StringArray keywords; /* the array of keywords */ int finished; /* whether we have finished processing the input */ /* open the input FILE */ FILE *fp = fopen(filename, "rt"); if (fp == NULL) return 0; /* read in the keywords */ if (!StringArrayRead(&keywords, output->scheme.keywords)) { fprintf(stderr, "Unable to read keywords file %s\n", output->scheme.keywords); return 0; } OutputFormat(output->scheme.code_b, output); /* state-machine to output the code */ state = NORMAL_LINE_START; finished = 0; while (!finished) { c = fgetc(fp); /* treat EOF as '\n' to ensure any current highlighting is ended */ if (c == EOF) { c = '\n'; finished = 1; } write = 1; switch (state) { case NORMAL_LINE_START: if (c == '#') { OutputFormat(output->scheme.directive_b, output); state = DIRECTIVE; break; } else if (!isspace(c)) state = NORMAL; /* fall through */ case NORMAL: if (c == '\n') state = NORMAL_LINE_START; else if (c == '/') state = HALF_COMMENT, write = 0; else if (isdigit(c)) { OutputFormat(output->scheme.constant_b, output); state = NUMBER_BEFORE_DOT; } else if (isalpha(c) || c == '_') { nident = 0; ident[nident++] = c; ident[nident] = '\0'; state = IDENTIFIER; write = 0; } else if (c == '\'') { OutputFormat(output->scheme.constant_b, output); state = SINGLE_STRING; } else if (c == '\"') { OutputFormat(output->scheme.constant_b, output); state = DOUBLE_STRING; } break; case HALF_COMMENT: if (c == '/') { OutputFormat(output->scheme.comment_b, output); OutputChar('/', output); state = CPP_COMMENT; } else if (c == '*') { OutputFormat(output->scheme.comment_b, output); OutputChar('/', output); state = C_COMMENT; } break; case C_COMMENT: if (c == '*') state = END_C_COMMENT; break; case END_C_COMMENT: if (c == '/') { OutputChar(c, output); OutputFormat(output->scheme.comment_e, output); state = NORMAL, write = 0; } else if (c != '*') state = C_COMMENT; break; case CPP_COMMENT: if (c == '\n') { OutputFormat(output->scheme.comment_e, output); state = NORMAL_LINE_START; } break; case DIRECTIVE_HALF_COMMENT: if (c == '/') { OutputFormat(output->scheme.directive_e, output); OutputFormat(output->scheme.comment_b, output); state = CPP_COMMENT; } else if (c == '*') { OutputFormat(output->scheme.directive_e, output); OutputFormat(output->scheme.comment_b, output); state = DIRECTIVE_C_COMMENT; } else state = DIRECTIVE; OutputChar('/', output); break; case DIRECTIVE_C_COMMENT: if (c == '*') state = DIRECTIVE_END_C_COMMENT; break; case DIRECTIVE_END_C_COMMENT: if (c == '/') { OutputChar(c, output); OutputFormat(output->scheme.comment_e, output); OutputFormat(output->scheme.directive_b, output); state = DIRECTIVE, write = 0; } else if (c != '*') state = DIRECTIVE_C_COMMENT; break; case SINGLE_STRING: if (c == '\'') { OutputChar(c, output); OutputFormat(output->scheme.constant_e, output); state = NORMAL; write = 0; } else if (c == '\\') state = SINGLE_STRING_ESC; break; case SINGLE_STRING_ESC: state = SINGLE_STRING; break; case DOUBLE_STRING: if (c == '\"') { OutputChar(c, output); OutputFormat(output->scheme.constant_e, output); state = NORMAL; write = 0; } else if (c == '\\') state = DOUBLE_STRING_ESC; break; case DOUBLE_STRING_ESC: state = DOUBLE_STRING; break; case DIRECTIVE: if (c == '\n') { OutputFormat(output->scheme.directive_e, output); state = NORMAL_LINE_START; } else if (c == '/') { state = DIRECTIVE_HALF_COMMENT; write = 0; } else if (c == '\\') state = DIRECTIVE_LINE_WRAP; break; case DIRECTIVE_LINE_WRAP: if (c == '\n' || !isspace(c)) state = DIRECTIVE; break; case NUMBER_BEFORE_DOT: if (c == '.') state = NUMBER_AFTER_DOT; else if (!isxdigit(c) && c != 'x' && c != 'X' && c != 'l' && c != 'L' && c != 'u' && c != 'U') { OutputFormat(output->scheme.constant_e, output); state = (c == '\n' ? NORMAL_LINE_START : NORMAL); } break; case NUMBER_AFTER_DOT: if (!isdigit(c)) { OutputFormat(output->scheme.constant_e, output); state = (c == '\n' ? NORMAL_LINE_START : NORMAL); } break; case IDENTIFIER: if ((isalnum(c) || c == '_') && nident < MAXIDENT) { ident[nident++] = c; ident[nident] = '\0'; write = 0; } else { if (StringArrayFind(&keywords, ident)) { OutputFormat(output->scheme.keyword_b, output); OutputString(ident, output); OutputFormat(output->scheme.keyword_e, output); } else OutputString(ident, output); state = (c == '\n' ? NORMAL_LINE_START : NORMAL); } break; default: fprintf(stderr, "We shouldn't be here!\n"); } if (write) OutputChar(c, output); } OutputFormat(output->scheme.code_e, output); fclose(fp); return 1; } /********************************************************************* * OutputChar * * Writes a single character, escaping it if necessary */ void OutputChar(char c, OutputFile *output) { switch (c) { case '<': case '>': case '&': case '|': fprintf(output->file, output->scheme.specialchar, c); ++output->column; break; case ' ': fprintf(output->file, "%s", output->scheme.space); ++output->column; break; case '\t': do { fprintf(output->file, "%s", output->scheme.space); ++output->column; } while ((output->column % output->scheme.tabwidth) != 0); break; case '\n': fprintf(output->file, "%s", output->scheme.newline); output->column = 0; break; default: fputc(c, output->file); ++output->column; } } /********************************************************************* * OutputString * * Writes a string, escaping characters as necessary */ void OutputString(const char *str, OutputFile *output) { const char *p = str; while (*p) OutputChar(*p++, output); } /********************************************************************* * OutputFormat * * Writes a format string */ void OutputFormat(const char *str, OutputFile *output) { fprintf(output->file, "%s", str); } /********************************************************************* * StringArrayRead * * Reads a file of strings (one string per line) into the * StringArray structure. The array is then sorted. */ int StringArrayRead(StringArray *array, const char *filename) { int blocksize = 8; /* number of extra strings to add when resizing */ int space = blocksize; /* the number of strings that can now fit */ char line[MAXLINE]; /* current line read from the FILE */ FILE *fp; /* pointer to the token FILE */ int n; /* index in current line */ /* initialise the array to contain no elements */ array->num = 0; array->strings = (char **) malloc(space * sizeof(char *)); /* open the file of strings */ fp = FOpenOnPath(filename, "rt"); if (fp == NULL) return 0; /* read the file of strings into the array */ while (!feof(fp)) { /* read in a line and remove '\n' from the end */ *line = '\0'; fgets(line, MAXLINE, fp); n = strlen(line) - 1; if (line[n] == '\n') line[n] = '\0'; if (!*line) continue; /* resize the array so that it contains enough space */ if (array->num >= space) { space += blocksize; array->strings = realloc(array->strings, space * sizeof(char *)); } /* add the string to the array */ array->strings[array->num] = malloc(strlen(line) + 1); strcpy(array->strings[array->num], line); ++array->num; } fclose(fp); /* sort the array of strings */ qsort(array->strings, array->num, sizeof(char *), StringArrayCmp); return 1; } /********************************************************************* * StringArrayCmp * * Comparison function for StringArray sorting and searching. */ int StringArrayCmp(const void *a, const void *b) { return strcmp((*(const char **) a), (*(const char **) b)); } /********************************************************************* * StringArrayFind * * Finds a given string in the array, returns NULL if not found. */ char * StringArrayFind(StringArray *array, const char *match) { char **retval = bsearch(&match, array->strings, array->num, sizeof(char *), StringArrayCmp); if (retval) return *retval; return NULL; } /********************************************************************* * SchemeRead * * Reads the highlighting scheme to be used from the given file. */ int SchemeRead(Scheme * scheme, const char *filename) { FILE *fp; char *key; char *value; fp = FOpenOnPath(filename, "rt"); if (fp == NULL) { fprintf(stderr, "Unable to open highlight scheme file %s\n", filename); return 0; } scheme->tabwidth = 2; scheme->keywords = ""; scheme->space = ""; scheme->newline = ""; scheme->specialchar = ""; scheme->code_b = ""; scheme->code_e = ""; scheme->comment_b = ""; scheme->comment_e = ""; scheme->directive_b = ""; scheme->directive_e = ""; scheme->constant_b = ""; scheme->constant_e = ""; scheme->keyword_b = ""; scheme->keyword_e = ""; while ((key = SchemeReadKey(fp)) != NULL) { value = SchemeReadValue(fp); if (strcasecmp(key, "tabwidth") == 0) { scheme->tabwidth = atoi(value); free(value); } else if (strcasecmp(key, "keywords") == 0) scheme->keywords = value; else if (strcasecmp(key, "space") == 0) scheme->space = value; else if (strcasecmp(key, "newline") == 0) scheme->newline = value; else if (strcasecmp(key, "specialchar") == 0) scheme->specialchar = value; else if (strcasecmp(key, "code_b") == 0) scheme->code_b = value; else if (strcasecmp(key, "code_e") == 0) scheme->code_e = value; else if (strcasecmp(key, "comment_b") == 0) scheme->comment_b = value; else if (strcasecmp(key, "comment_e") == 0) scheme->comment_e = value; else if (strcasecmp(key, "directive_b") == 0) scheme->directive_b = value; else if (strcasecmp(key, "directive_e") == 0) scheme->directive_e = value; else if (strcasecmp(key, "constant_b") == 0) scheme->constant_b = value; else if (strcasecmp(key, "constant_e") == 0) scheme->constant_e = value; else if (strcasecmp(key, "keyword_b") == 0) scheme->keyword_b = value; else if (strcasecmp(key, "keyword_e") == 0) scheme->keyword_e = value; else free(value); free(key); } fclose(fp); return 1; } /********************************************************************* * SchemeReadKey * * Reads the next key name (for a key-value pair) from the file. A * key name starts with a '.' at the start of the line and ends with * a '=' character. The caller is responsible for freeing the string * returned by this function. */ char * SchemeReadKey(FILE *fp) { int currentChar; int prevChar; char *key; int keyLength; int keyMax; /* skip everything until we find a line starting with a dot */ prevChar = '\n'; /* when we begin we have just started a new line */ currentChar = fgetc(fp); while (currentChar != EOF) { if (currentChar == '.' && prevChar == '\n') break; prevChar = currentChar; currentChar = fgetc(fp); } if (currentChar == EOF) return NULL; /* create new string to hold the key we read in */ keyMax = KEYCHUNK; keyLength = 0; key = malloc(keyMax); if (key == NULL) { fprintf(stderr, "Out of memory\n"); exit(1); } /* read everything up until we hit a '=' */ currentChar = fgetc(fp); while (currentChar != EOF && currentChar != '=') { /* add the character to the string, resizing as necessary */ key[keyLength++] = currentChar; if (keyLength == keyMax) { keyMax += KEYCHUNK; key = realloc(key, keyMax); if (key == NULL) { fprintf(stderr, "Out of memory\n"); exit(1); } } currentChar = fgetc(fp); } key[keyLength] = '\0'; return key; } /********************************************************************* * SchemeReadValue * * Reads the next value (for a key-value pair) from the file. A value * follows on immediately after a key, and ends just before the * beginning of the next key (denoted by a '.' at the start of the * line. The caller is responsible for freeing the string returned by * this function. */ char * SchemeReadValue(FILE *fp) { int currentChar; int prevChar; char *value; int valueLength; int valueMax; int inComment; /* create new string to hold the key we read in */ valueMax = VALUECHUNK; valueLength = 0; value = malloc(valueMax); if (value == NULL) { fprintf(stderr, "Out of memory\n"); exit(1); } /* read everything up until we get a new line starting with a dot */ inComment = 0; prevChar = 0; currentChar = fgetc(fp); while (currentChar != EOF) { if (prevChar == '\n') { if (currentChar == '.') { /* put back '.' character and remove previous newline from string */ ungetc(currentChar, fp); --valueLength; break; } if (currentChar == '#') { /* this is the start of a comment */ inComment = 1; } } if (!inComment) { /* add the character to the string, resizing as necessary */ value[valueLength++] = currentChar; if (valueLength == valueMax) { valueMax += VALUECHUNK; value = realloc(value, valueMax); if (value == NULL) { fprintf(stderr, "Out of memory\n"); exit(1); } } } if (currentChar == '\n') { /* end current comment, if any */ inComment = 0; } prevChar = currentChar; currentChar = fgetc(fp); } /* if ended due to EOF, remove last blank line, if any */ if (currentChar == EOF && prevChar == '\n') --valueLength; value[valueLength] = '\0'; return value; } /********************************************************************* * FOpenOnPath * * Like `fopen' but makes search through PATH environment variable. */ FILE * FOpenOnPath(const char *fname, const char *mode) { #if defined(HAVE_SEARCHPATH) FILE *fp; char *path; /* First, try file name as supplied */ if ((fp = fopen(fname, mode)) != NULL) return fp; /* Search the PATH */ path = searchpath(fname); if (path == NULL) return NULL; return fopen(path, mode); #else FILE *fp; const char *env, *startp, *endp; char *buf; /* First, try file name as supplied */ if ((fp = fopen(fname, mode)) != NULL) return fp; if ((env = getenv("PATH")) == NULL) return NULL; if ((buf = (char *) malloc(strlen(env) + strlen(fname) + 1)) == NULL) return NULL; /* Search the PATH */ startp = env; do { char *p; /* Get next directory's path */ if ((endp = strchr(startp, PATH_DELIM)) != 0) { int s = endp - startp; strncpy(buf, startp, s); buf[s] = 0; startp = endp + 1; } else strcpy(buf, startp); if ((p = strchr(buf, 0)) != buf) { if (*(p - 1) != DIRECTORY_DELIM) *p++ = DIRECTORY_DELIM; strcpy(p, fname); /* Append fname to path */ if ((fp = fopen(buf, mode)) != NULL) break; /* Found and opened! */ } } while (endp != 0); free(buf); return fp; #endif }