/*
 * c2html.c
 *
 * Converts c and c++ code into HTML for publishing on the WWW
 * Copyright (C) 1996-1999 Christopher Kohlhoff (chris@kohlhoff.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#if defined(HAVE_DIR_H)
#include <dir.h>
#endif


/*********************************************************************
 * Structures and Definitions
 */

#define MAXIDENT   512
#define MAXLINE    512
#define KEYCHUNK   16
#define VALUECHUNK 32

/* StringArray structure for a sorted list of keywords */
typedef struct StringArray
{
  char **strings;
  int num;
}
StringArray;

/* Highlighting scheme to be used */
typedef struct Scheme
{
  int tabwidth;
  char *keywords;
  char *space;
  char *newline;
  char *specialchar;
  char *code_b;
  char *code_e;
  char *comment_b;
  char *comment_e;
  char *directive_b;
  char *directive_e;
  char *constant_b;
  char *constant_e;
  char *keyword_b;
  char *keyword_e;
}
Scheme;

/* Output file and related context */
typedef struct OutputFile
{
  Scheme scheme;
  FILE *file;
  int column;
}
OutputFile;


/*********************************************************************
 * Function declarations
 */

int ConvertCodeToHtml(const char *filename, OutputFile *output);
void OutputChar(char c, OutputFile *output);
void OutputString(const char *str, OutputFile *output);
void OutputFormat(const char *str, OutputFile *output);

/* StringArray functions */
int StringArrayRead(StringArray *array, const char *filename);
char *StringArrayFind(StringArray *array, const char *match);
int StringArrayCmp(const void *a, const void *b);
FILE *FOpenOnPath(const char *fname, const char *mode);

/* Scheme functions */
int SchemeRead(Scheme * scheme, const char *filename);
char *SchemeReadKey(FILE *fp);
char *SchemeReadValue(FILE *fp);


/*********************************************************************
 * Program entry point
 */
int
main(int argc, char *argv[])
{
  OutputFile output;

  if (argc < 2)
  {
    fprintf(stderr, "Usage: c2html <filename> [<scheme file>]\n");
    return EXIT_FAILURE;
  }

  /* Initialise the output file */
  SchemeRead(&output.scheme, argc > 2 ? argv[2] : "c2html.scheme");
  output.file = stdout;
  output.column = 0;

  if (!ConvertCodeToHtml(argv[1], &output))
    return EXIT_FAILURE;

  return EXIT_SUCCESS;
}


/*********************************************************************
 * Machine States
 */

typedef enum State
{
  NORMAL,
  NORMAL_LINE_START,
  HALF_COMMENT,
  C_COMMENT,
  END_C_COMMENT,
  CPP_COMMENT,
  DIRECTIVE_HALF_COMMENT,
  DIRECTIVE_C_COMMENT,
  DIRECTIVE_END_C_COMMENT,
  SINGLE_STRING,
  SINGLE_STRING_ESC,
  DOUBLE_STRING,
  DOUBLE_STRING_ESC,
  DIRECTIVE,
  DIRECTIVE_LINE_WRAP,
  NUMBER_BEFORE_DOT,
  NUMBER_AFTER_DOT,
  IDENTIFIER
}
State;


/*********************************************************************
 * ConvertCodeToHtml
 *
 * A State Machine that turns C or C++ code into HTML
 */
int
ConvertCodeToHtml(const char *filename, OutputFile *output)
{
  int c;                        /* the character just read */
  State state;                  /* the current state of the machine */
  int write;                    /* should we write the character just read */
  char ident[MAXIDENT + 1];     /* the current identifier being read */
  int nident;                   /* the position in the current identifier */
  StringArray keywords;         /* the array of keywords */
  int finished;                 /* whether we have finished processing the input */

  /* open the input FILE */
  FILE *fp = fopen(filename, "rt");
  if (fp == NULL)
    return 0;

  /* read in the keywords */
  if (!StringArrayRead(&keywords, output->scheme.keywords))
  {
    fprintf(stderr, "Unable to read keywords file %s\n", output->scheme.keywords);
    return 0;
  }

  OutputFormat(output->scheme.code_b, output);

  /* state-machine to output the code */
  state = NORMAL_LINE_START;
  finished = 0;
  while (!finished)
  {
    c = fgetc(fp);

    /* treat EOF as '\n' to ensure any current highlighting is ended */
    if (c == EOF)
    {
      c = '\n';
      finished = 1;
    }

    write = 1;
    switch (state)
    {
    case NORMAL_LINE_START:
      if (c == '#')
      {
        OutputFormat(output->scheme.directive_b, output);
        state = DIRECTIVE;
        break;
      }
      else if (!isspace(c))
        state = NORMAL;
      /* fall through */
    case NORMAL:
      if (c == '\n')
        state = NORMAL_LINE_START;
      else if (c == '/')
        state = HALF_COMMENT, write = 0;
      else if (isdigit(c))
      {
        OutputFormat(output->scheme.constant_b, output);
        state = NUMBER_BEFORE_DOT;
      }
      else if (isalpha(c) || c == '_')
      {
        nident = 0;
        ident[nident++] = c;
        ident[nident] = '\0';
        state = IDENTIFIER;
        write = 0;
      }
      else if (c == '\'')
      {
        OutputFormat(output->scheme.constant_b, output);
        state = SINGLE_STRING;
      }
      else if (c == '\"')
      {
        OutputFormat(output->scheme.constant_b, output);
        state = DOUBLE_STRING;
      }
      break;

    case HALF_COMMENT:
      if (c == '/')
      {
        OutputFormat(output->scheme.comment_b, output);
        OutputChar('/', output);
        state = CPP_COMMENT;
      }
      else if (c == '*')
      {
        OutputFormat(output->scheme.comment_b, output);
        OutputChar('/', output);
        state = C_COMMENT;
      }
      break;

    case C_COMMENT:
      if (c == '*')
        state = END_C_COMMENT;
      break;

    case END_C_COMMENT:
      if (c == '/')
      {
        OutputChar(c, output);
        OutputFormat(output->scheme.comment_e, output);
        state = NORMAL, write = 0;
      }
      else if (c != '*')
        state = C_COMMENT;
      break;

    case CPP_COMMENT:
      if (c == '\n')
      {
        OutputFormat(output->scheme.comment_e, output);
        state = NORMAL_LINE_START;
      }
      break;

    case DIRECTIVE_HALF_COMMENT:
      if (c == '/')
      {
        OutputFormat(output->scheme.directive_e, output);
        OutputFormat(output->scheme.comment_b, output);
        state = CPP_COMMENT;
      }
      else if (c == '*')
      {
        OutputFormat(output->scheme.directive_e, output);
        OutputFormat(output->scheme.comment_b, output);
        state = DIRECTIVE_C_COMMENT;
      }
      else
        state = DIRECTIVE;
      OutputChar('/', output);
      break;

    case DIRECTIVE_C_COMMENT:
      if (c == '*')
        state = DIRECTIVE_END_C_COMMENT;
      break;

    case DIRECTIVE_END_C_COMMENT:
      if (c == '/')
      {
        OutputChar(c, output);
        OutputFormat(output->scheme.comment_e, output);
        OutputFormat(output->scheme.directive_b, output);
        state = DIRECTIVE, write = 0;
      }
      else if (c != '*')
        state = DIRECTIVE_C_COMMENT;
      break;

    case SINGLE_STRING:
      if (c == '\'')
      {
        OutputChar(c, output);
        OutputFormat(output->scheme.constant_e, output);
        state = NORMAL;
        write = 0;
      }
      else if (c == '\\')
        state = SINGLE_STRING_ESC;
      break;

    case SINGLE_STRING_ESC:
      state = SINGLE_STRING;
      break;

    case DOUBLE_STRING:
      if (c == '\"')
      {
        OutputChar(c, output);
        OutputFormat(output->scheme.constant_e, output);
        state = NORMAL;
        write = 0;
      }
      else if (c == '\\')
        state = DOUBLE_STRING_ESC;
      break;

    case DOUBLE_STRING_ESC:
      state = DOUBLE_STRING;
      break;

    case DIRECTIVE:
      if (c == '\n')
      {
        OutputFormat(output->scheme.directive_e, output);
        state = NORMAL_LINE_START;
      }
      else if (c == '/')
      {
        state = DIRECTIVE_HALF_COMMENT;
        write = 0;
      }
      else if (c == '\\')
        state = DIRECTIVE_LINE_WRAP;
      break;

    case DIRECTIVE_LINE_WRAP:
      if (c == '\n' || !isspace(c))
        state = DIRECTIVE;
      break;

    case NUMBER_BEFORE_DOT:
      if (c == '.')
        state = NUMBER_AFTER_DOT;
      else if (!isxdigit(c) && c != 'x' && c != 'X' && c != 'l' && c != 'L' && c != 'u' && c != 'U')
      {
        OutputFormat(output->scheme.constant_e, output);
        state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
      }
      break;

    case NUMBER_AFTER_DOT:
      if (!isdigit(c))
      {
        OutputFormat(output->scheme.constant_e, output);
        state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
      }
      break;

    case IDENTIFIER:
      if ((isalnum(c) || c == '_') && nident < MAXIDENT)
      {
        ident[nident++] = c;
        ident[nident] = '\0';
        write = 0;
      }
      else
      {
        if (StringArrayFind(&keywords, ident))
        {
          OutputFormat(output->scheme.keyword_b, output);
          OutputString(ident, output);
          OutputFormat(output->scheme.keyword_e, output);
        }
        else
          OutputString(ident, output);
        state = (c == '\n' ? NORMAL_LINE_START : NORMAL);
      }
      break;

    default:
      fprintf(stderr, "We shouldn't be here!\n");
    }
    if (write)
      OutputChar(c, output);
  }

  OutputFormat(output->scheme.code_e, output);

  fclose(fp);

  return 1;
}


/*********************************************************************
 * OutputChar
 *
 * Writes a single character, escaping it if necessary
 */
void
OutputChar(char c, OutputFile *output)
{
  switch (c)
  {
  case '<':
  case '>':
  case '&':
  case '|':
    fprintf(output->file, output->scheme.specialchar, c);
    ++output->column;
    break;

  case ' ':
    fprintf(output->file, "%s", output->scheme.space);
    ++output->column;
    break;

  case '\t':
    do
    {
      fprintf(output->file, "%s", output->scheme.space);
      ++output->column;
    }
    while ((output->column % output->scheme.tabwidth) != 0);
    break;

  case '\n':
    fprintf(output->file, "%s", output->scheme.newline);
    output->column = 0;
    break;

  default:
    fputc(c, output->file);
    ++output->column;
  }
}


/*********************************************************************
 * OutputString
 *
 * Writes a string, escaping characters as necessary
 */
void
OutputString(const char *str, OutputFile *output)
{
  const char *p = str;
  while (*p)
    OutputChar(*p++, output);
}


/*********************************************************************
 * OutputFormat
 *
 * Writes a format string
 */
void
OutputFormat(const char *str, OutputFile *output)
{
  fprintf(output->file, "%s", str);
}


/*********************************************************************
 * StringArrayRead
 * 
 * Reads a file of strings (one string per line) into the
 * StringArray structure. The array is then sorted.
 */
int
StringArrayRead(StringArray *array, const char *filename)
{
  int blocksize = 8;            /* number of extra strings to add when resizing */
  int space = blocksize;        /* the number of strings that can now fit */
  char line[MAXLINE];           /* current line read from the FILE */
  FILE *fp;                     /* pointer to the token FILE */
  int n;                        /* index in current line */

  /* initialise the array to contain no elements */
  array->num = 0;
  array->strings = (char **) malloc(space * sizeof(char *));

  /* open the file of strings */
  fp = FOpenOnPath(filename, "rt");
  if (fp == NULL)
    return 0;

  /* read the file of strings into the array */
  while (!feof(fp))
  {
    /* read in a line and remove '\n' from the end */
    *line = '\0';
    fgets(line, MAXLINE, fp);
    n = strlen(line) - 1;
    if (line[n] == '\n')
      line[n] = '\0';
    if (!*line)
      continue;

    /* resize the array so that it contains enough space */
    if (array->num >= space)
    {
      space += blocksize;
      array->strings = realloc(array->strings, space * sizeof(char *));
    }

    /* add the string to the array */
    array->strings[array->num] = malloc(strlen(line) + 1);
    strcpy(array->strings[array->num], line);
    ++array->num;
  }
  fclose(fp);

  /* sort the array of strings */
  qsort(array->strings, array->num, sizeof(char *), StringArrayCmp);

  return 1;
}


/*********************************************************************
 * StringArrayCmp
 *
 * Comparison function for StringArray sorting and searching.
 */
int
StringArrayCmp(const void *a, const void *b)
{
  return strcmp((*(const char **) a), (*(const char **) b));
}


/*********************************************************************
 * StringArrayFind
 *
 * Finds a given string in the array, returns NULL if not found.
 */
char *
StringArrayFind(StringArray *array, const char *match)
{
  char **retval = bsearch(&match, array->strings, array->num,
                          sizeof(char *), StringArrayCmp);
  if (retval)
    return *retval;
  return NULL;
}


/*********************************************************************
 * SchemeRead
 *
 * Reads the highlighting scheme to be used from the given file.
 */
int
SchemeRead(Scheme * scheme, const char *filename)
{
  FILE *fp;
  char *key;
  char *value;

  fp = FOpenOnPath(filename, "rt");
  if (fp == NULL)
  {
    fprintf(stderr, "Unable to open highlight scheme file %s\n", filename);
    return 0;
  }

  scheme->tabwidth = 2;
  scheme->keywords = "";
  scheme->space = "";
  scheme->newline = "";
  scheme->specialchar = "";
  scheme->code_b = "";
  scheme->code_e = "";
  scheme->comment_b = "";
  scheme->comment_e = "";
  scheme->directive_b = "";
  scheme->directive_e = "";
  scheme->constant_b = "";
  scheme->constant_e = "";
  scheme->keyword_b = "";
  scheme->keyword_e = "";

  while ((key = SchemeReadKey(fp)) != NULL)
  {
    value = SchemeReadValue(fp);

    if (strcasecmp(key, "tabwidth") == 0)
    {
      scheme->tabwidth = atoi(value);
      free(value);
    }
    else if (strcasecmp(key, "keywords") == 0)
      scheme->keywords = value;
    else if (strcasecmp(key, "space") == 0)
      scheme->space = value;
    else if (strcasecmp(key, "newline") == 0)
      scheme->newline = value;
    else if (strcasecmp(key, "specialchar") == 0)
      scheme->specialchar = value;
    else if (strcasecmp(key, "code_b") == 0)
      scheme->code_b = value;
    else if (strcasecmp(key, "code_e") == 0)
      scheme->code_e = value;
    else if (strcasecmp(key, "comment_b") == 0)
      scheme->comment_b = value;
    else if (strcasecmp(key, "comment_e") == 0)
      scheme->comment_e = value;
    else if (strcasecmp(key, "directive_b") == 0)
      scheme->directive_b = value;
    else if (strcasecmp(key, "directive_e") == 0)
      scheme->directive_e = value;
    else if (strcasecmp(key, "constant_b") == 0)
      scheme->constant_b = value;
    else if (strcasecmp(key, "constant_e") == 0)
      scheme->constant_e = value;
    else if (strcasecmp(key, "keyword_b") == 0)
      scheme->keyword_b = value;
    else if (strcasecmp(key, "keyword_e") == 0)
      scheme->keyword_e = value;
    else
      free(value);

    free(key);
  }

  fclose(fp);

  return 1;
}


/*********************************************************************
 * SchemeReadKey
 *
 * Reads the next key name (for a key-value pair) from the file. A
 * key name starts with a '.' at the start of the line and ends with
 * a '=' character. The caller is responsible for freeing the string
 * returned by this function.
 */
char *
SchemeReadKey(FILE *fp)
{
  int currentChar;
  int prevChar;
  char *key;
  int keyLength;
  int keyMax;

  /* skip everything until we find a line starting with a dot */
  prevChar = '\n';              /* when we begin we have just started a new line */
  currentChar = fgetc(fp);
  while (currentChar != EOF)
  {
    if (currentChar == '.' && prevChar == '\n')
      break;

    prevChar = currentChar;
    currentChar = fgetc(fp);
  }
  if (currentChar == EOF)
    return NULL;

  /* create new string to hold the key we read in */
  keyMax = KEYCHUNK;
  keyLength = 0;
  key = malloc(keyMax);
  if (key == NULL)
  {
    fprintf(stderr, "Out of memory\n");
    exit(1);
  }

  /* read everything up until we hit a '=' */
  currentChar = fgetc(fp);
  while (currentChar != EOF && currentChar != '=')
  {
    /* add the character to the string, resizing as necessary */
    key[keyLength++] = currentChar;
    if (keyLength == keyMax)
    {
      keyMax += KEYCHUNK;
      key = realloc(key, keyMax);
      if (key == NULL)
      {
        fprintf(stderr, "Out of memory\n");
        exit(1);
      }
    }

    currentChar = fgetc(fp);
  }

  key[keyLength] = '\0';
  return key;
}


/*********************************************************************
 * SchemeReadValue
 *
 * Reads the next value (for a key-value pair) from the file. A value
 * follows on immediately after a key, and ends just before the
 * beginning of the next key (denoted by a '.' at the start of the
 * line. The caller is responsible for freeing the string returned by
 * this function.
 */
char *
SchemeReadValue(FILE *fp)
{
  int currentChar;
  int prevChar;
  char *value;
  int valueLength;
  int valueMax;
  int inComment;

  /* create new string to hold the key we read in */
  valueMax = VALUECHUNK;
  valueLength = 0;
  value = malloc(valueMax);
  if (value == NULL)
  {
    fprintf(stderr, "Out of memory\n");
    exit(1);
  }

  /* read everything up until we get a new line starting with a dot */
  inComment = 0;
  prevChar = 0;
  currentChar = fgetc(fp);
  while (currentChar != EOF)
  {
    if (prevChar == '\n')
    {
      if (currentChar == '.')
      {
        /* put back '.' character and remove previous newline from string */
        ungetc(currentChar, fp);
        --valueLength;
        break;
      }

      if (currentChar == '#')
      {
        /* this is the start of a comment */
        inComment = 1;
      }
    }

    if (!inComment)
    {
      /* add the character to the string, resizing as necessary */
      value[valueLength++] = currentChar;
      if (valueLength == valueMax)
      {
        valueMax += VALUECHUNK;
        value = realloc(value, valueMax);
        if (value == NULL)
        {
          fprintf(stderr, "Out of memory\n");
          exit(1);
        }
      }
    }

    if (currentChar == '\n')
    {
      /* end current comment, if any */
      inComment = 0;
    }

    prevChar = currentChar;
    currentChar = fgetc(fp);
  }

  /* if ended due to EOF, remove last blank line, if any */
  if (currentChar == EOF && prevChar == '\n')
    --valueLength;

  value[valueLength] = '\0';
  return value;
}


/*********************************************************************
 * FOpenOnPath
 *
 * Like `fopen' but makes search through PATH environment variable.
 */
FILE *
FOpenOnPath(const char *fname, const char *mode)
{
#if defined(HAVE_SEARCHPATH)
  FILE *fp;
  char *path;

  /* First, try file name as supplied */
  if ((fp = fopen(fname, mode)) != NULL)
    return fp;

  /* Search the PATH */
  path = searchpath(fname);
  if (path == NULL)
    return NULL;
  return fopen(path, mode);
#else
  FILE *fp;
  const char *env, *startp, *endp;
  char *buf;

  /* First, try file name as supplied */
  if ((fp = fopen(fname, mode)) != NULL)
    return fp;

  if ((env = getenv("PATH")) == NULL)
    return NULL;
  if ((buf = (char *) malloc(strlen(env) + strlen(fname) + 1)) == NULL)
    return NULL;

  /* Search the PATH */
  startp = env;
  do
  {
    char *p;
    /* Get next directory's path */
    if ((endp = strchr(startp, PATH_DELIM)) != 0)
    {
      int s = endp - startp;
      strncpy(buf, startp, s);
      buf[s] = 0;
      startp = endp + 1;
    }
    else
      strcpy(buf, startp);

    if ((p = strchr(buf, 0)) != buf)
    {
      if (*(p - 1) != DIRECTORY_DELIM)
        *p++ = DIRECTORY_DELIM;
      strcpy(p, fname);         /* Append fname to path */
      if ((fp = fopen(buf, mode)) != NULL)
        break;                  /* Found and opened! */
    }

  }
  while (endp != 0);

  free(buf);
  return fp;
#endif
}