/* ------------------------------------------------------------------------
@NAME       : post_parse.c
@DESCRIPTION: Operations applied to the AST (or strings in it) after 
              parsing is complete.
@GLOBALS    : 
@CALLS      : 
@CREATED    : 1997/01/12, Greg Ward (from code in bibparse.c, lex_auxiliary.c)
@MODIFIED   : 
@VERSION    : $Id: post_parse.c,v 1.3 1997/02/07 03:31:15 greg Exp greg $
-------------------------------------------------------------------------- */
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "error.h"
#include "prototypes.h"
#include "bibtex_ast.h"
#include "ast.h"

#define DEBUG 1


/* ------------------------------------------------------------------------
@NAME       : postprocess_string ()
@INPUT      : s
              collapse_whitespace
              delete_quotes
              convert_quotes
@OUTPUT     : s (modified in place according to the flags)
@RETURNS    : (void)
@DESCRIPTION: Make a pass over string s to do some subset of the following
              operations:
                 * collapse whitespae according to BibTeX rules
                 * delete quotation characters
                 * convert all quotes to curly braces

              (All are optional, and controlled by the obviously-named flag
              parameters.)

              Whitespace is collapsed according to these rules:
                 * whitespace after/before opening/closing quotes is deleted
                 * within the string, each whitespace sequence is replaced by
                   a single space

              Note that part of the work is done by the lexer proper,
              namely conversion of tabs and newlines to spaces.

              We make two assumptions about the contents of s:
                 * first char. is '"' or '{'
                 * last char is '"' or '}'
              
              and we also verify that the opening and closing quote
              characters match.
@GLOBALS    : 
@CALLS      : 
@CREATED    : originally in lex_auxiliary.c; moved here 1997/01/12
@MODIFIED   : 
-------------------------------------------------------------------------- */
void 
postprocess_string (char *s,
                    int   collapse_whitespace,
                    int   delete_quotes,
                    int   convert_quotes)
{
   char *i, *j;
   int   quoted;                        /* is the string already quoted? */
   int   len;

#if DEBUG > 1
   printf ("postprocess_string: looking at >%s<\n", s);
#endif

   /* First, assert the assumptions about s. */

   len = strlen (s);
   quoted = (s[0] == '"' || s[0] == '{') &&       /* quote char at start... */
            (s[len-1] == '"' || s[len-1] == '}'); /* ...and at the end */


   /* Now check that the quote characters match up */

   if (quoted &&
       ! ((s[0] == '"' && s[len-1] == '"') ||
          (s[0] == '{' && s[len-1] == '}')))
   {
      lexical_error ("token mismatch: string started with %c, " 
                     "but ended with %c",
                     s[0], s[len-1]);
   }

   if (quoted && !delete_quotes && convert_quotes && s[0] == '"')
   {
      s[    0] = '{';
      s[len-1] = '}';
   }


   /*
    * N.B. i and j will both point into s; j is always >= i, and
    * we copy characters from j to i.  Whitespace is collapsed/deleted
    * by advancing j without advancing i.
    *
    * Initial conditions for i and j:
    *   - copy from first `interesting' character (after the opening
    *     quote if quoted, else first character)
    *   - copy to after the opening quote if quoted and we're not deleting
    *     quotes, else copy to the first char
    */

   j = (quoted) ? (s+1) : s;
   i = (quoted && !delete_quotes) ? (s+1) : s;


   /*
    * If we're supposed to collapse whitespace, then advance j to the
    * first non-space character (apart from any opening quote).
    */

   if (collapse_whitespace)
   {
      while (*j == ' ' && *j != (char) 0)
         j++;
   }

   while (*j != (char) 0)
   {

      /*
       * If we're in a string of spaces (ie. current and previous char.
       * are both space), and we're supposed to be collapsing whitespace,
       * then skip until we hit a non-space character (or end of string).
       */

      if (collapse_whitespace && *j == ' ' && *(j-1) == ' ') 
      {
         while (*j == ' ' && *j != (char) 0)
            j++;
      }

      /* Copy the current character from j down to i */

      *(i++) = *(j++);
   }
   *i = (char) 0;               /* make sure string is terminated */


   /* Now, delete the closing quote (if appropriate) */

   len = strlen (s);
   if (quoted && delete_quotes)
   {
      s[--len] = (char) 0;
   }


   /*
    * And mop up whitespace (if any) preceding closing quote -- note that
    * if there was any whitespace there, it has already been collapsed to
    * exactly one space.
    */

   if (collapse_whitespace && s[len-1] == ' ')
   {
      s[--len] = (char) 0;
   }

#if DEBUG > 1
   printf ("                transformed to >%s<\n", s);
#endif

} /* postprocess_string */


/* ------------------------------------------------------------------------
@NAME       : paste_strings()
@INPUT      : 
@OUTPUT     : 
@RETURNS    : 
@DESCRIPTION: 
@GLOBALS    : 
@CALLS      : 
@CREATED    : 1997/02/01, GPW (from code in postprocess_field())
@MODIFIED   : 
-------------------------------------------------------------------------- */
#if 0
char *paste_strings (AST *field, bt_options_t *options, int replace)
{
   int    tot_len;              /* total length of pasted string */
   char  *new_string;           /* in case of string pasting */
   char  *tmp_string;
   int    len;
   AST   *item;

   tot_len = 0;
   new_string = NULL;

   /* 
    * If we're to concatenate (paste) sub-strings, we need to know the
    * total length of them.  (The total length includes whitespace but not
    * quotes, because quotes are deleted before pasting, but whitespace
    * isn't collapsed until after pasting.  The "- 2" term on macros and
    * strings is to account for to-be-deleted quotes.)
    */

   item = field->down;
   while (item)
   {
      switch (item->nodetype)
      {
         case AST_MACRO:
            tot_len += macro_length (item->text) - 2;
            break;
         case AST_STRING:
            tot_len += strlen (item->text) - 2;
            break;
         case AST_NUMBER:
            tot_len += strlen (item->text);
            break;
         default:
            internal_error ("field item has bad nodetype (%d)",
                            (int) item->nodetype);
      }
      item = item->right;
   }

   /* Now allocate the buffer in which we'll accumulate the whole string */

   tot_len += 2;             /* quotes for the concatenated string */
   new_string = (char *) calloc (tot_len+1, sizeof (char));
   new_string[0] = '{';


   /* And the main loop in which we append substrings to new_string */

   item = field->down;
   while (item)
   {
      /* 
       * If the string for this item is quoted, make a copy of it with
       * quotes deleted, and put that into new_string.
       */

      if (item->text[0] == '{' || item->text[1] == '"')
      {
         tmp_string = strdup (item->text + 1);
         len = strlen (tmp_string);
         assert (tmp_string[len-1] == '}' || tmp_string[len-1] == '"');
         tmp_string[len-1] = (char) 0;

         strcat (new_string, tmp_string);
         free (tmp_string);
      }
      else                      /* otherwise just copy the item text */
      {
         strcat (new_string, item->text);
      }

      item = item->right;
   }

   /* 
    * We have to do one more postprocess_string() -- this time, to collapse
    * whitespace and delete quotes according to the caller's desire.  Also,
    * we (optionally) replace the list of items in the AST node with the
    * new string.
    */

   assert (strlen (new_string) == tot_len-1);
   new_string[tot_len-1] = '}';

   postprocess_string (new_string,
                       options->collapse_whitespace,
                       options->delete_quote,
                       0);

   if (replace)
   {
      /* 
       * Delete all but first child of `field', and replace text for
       * first child with new_string.
       */

      item = field->down;       /* first child */
      zzfree_ast (item->right); /* free from second child on */
      item->right = NULL;
      free (item->text);        /* and replace the text */
      item->text = new_string;
   }

   return new_string;

} /* paste_strings() */
#endif


/* ------------------------------------------------------------------------
@NAME       : postprocess_field()
@INPUT      : 
@OUTPUT     : 
@RETURNS    : 
@DESCRIPTION: 
@GLOBALS    : 
@CALLS      : 
@CREATED    : 1997/01/10, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
char *postprocess_field (AST *field, bt_options_t *options, int replace)
{
   AST   *item;
   int    pasting;
   int    collapse_whitespace;
   int    delete_quotes;
   int    tot_len;              /* total length of pasted string */
   char  *new_string;           /* in case of string pasting */
   char  *tmp_string;

   strlwr (field->text);        /* downcase field name */

   /* 
    * We will paste strings iff the user wants us to, and there are at least
    * two children of field
    */

   pasting = (options->paste_strings) && (field->down) && (field->down->right);

   /* 
    * If we're to concatenate (paste) sub-strings, we need to know the
    * total length of them.  (The total length includes whitespace and 
    * quotes, even though we might delete quotes.  Better safe than sorry.)
    */

   tot_len = 0;                         /* these are out here to keep */
   new_string = NULL;                   /* gcc -Wall happy */
   tmp_string = NULL;

   if (pasting)
   {
      item = field->down;
      while (item)
      {
         switch (item->nodetype)
         {
            case AST_MACRO:
               tot_len += macro_length (item->text);
               break;
            case AST_STRING:
               tot_len += (item->text) ? (strlen (item->text)) : 0;
               break;
            case AST_NUMBER:
               tot_len += (item->text) ? (strlen (item->text)) : 0;
               break;
            default:
               internal_error ("field item has bad nodetype (%d)",
                               (int) item->nodetype);
         }
         item = item->right;
      }

      /* Now allocate the buffer in which we'll accumulate the whole string */

      tot_len += 2;             /* quotes for the concatenated string */
      new_string = (char *) calloc (tot_len+1, sizeof (char));
      if (! options->delete_quotes)
         new_string[0] = '{';
   }


   /* 
    * Before entering the main loop, figure out just what
    * postprocess_string() is supposed to do -- eg. if pasting strings,
    * we should delete quotes but not (yet) collapse whitespace.
    */

   if (pasting)
   {
      collapse_whitespace = 0;
      delete_quotes = 1;
   }
   else
   {
      collapse_whitespace = options->collapse_whitespace;
      delete_quotes = options->delete_quotes;
   }

   if (pasting && !options->expand_macros)
   {
      internal_error ("can't paste strings without expanding macros");
   }

   /*
    * Now the main loop to process each string, and possibly tack it onto
    * new_string.
    */

   item = field->down;
   while (item)
   {
      tmp_string = NULL;

      if (item->nodetype == AST_MACRO && options->expand_macros)
      {
         tmp_string = macro_text (item);
         if (tmp_string != NULL)
         {
            tmp_string = strdup (tmp_string);
            postprocess_string (tmp_string, 
                                collapse_whitespace,
                                delete_quotes,
                                0);
         }

         if (replace)
         {
            item->nodetype = AST_STRING;
            free (item->text);          /* hope this is ok... */
            item->text = tmp_string;
         }
      }

      else if (item->nodetype == AST_STRING && item->text)
      {
         if (replace)
            tmp_string = item->text;
         else
            tmp_string = strdup (item->text);

         postprocess_string (tmp_string, 
                             collapse_whitespace,
                             delete_quotes,
                             0);
      }

      if (item->nodetype == AST_NUMBER && item->text)
      {
         tmp_string = item->text;
      }

      if (pasting)
      {
         if (tmp_string)
            strcat (new_string, tmp_string);
      }
      else
      {
         new_string = tmp_string;
      }

      item = item->right;
   }

   if (pasting)
   {
      int    len;

      len = strlen (new_string);
      if (! options->delete_quotes)
      {
         new_string[len++] = '}';
         new_string[len] = (char) 0;
      }
      assert (len <= tot_len);          /* hope we alloc'd enough! */

      postprocess_string (new_string,
                          options->collapse_whitespace,
                          options->delete_quotes,
                          0);

      /* 
       * If replacing data in the AST, delete all but first child of
       * `field', and replace text for first child with new_string.
       */

      if (replace)
      {
         item = field->down;       /* first child */
         zzfree_ast (item->right); /* free from second child on */
         item->right = NULL;
         free (item->text);        /* and replace the text */
         item->text = new_string;
      }
   }

   return new_string;
   
} /* postprocess_field() */


/* ------------------------------------------------------------------------
@NAME       : postprocess_entry() 
@INPUT      : 
@OUTPUT     : 
@RETURNS    : 
@DESCRIPTION: Postprocesses all the strings in an entry: collapse whitespace,
              concatenate substrings, expands macros, and whatnot.
@GLOBALS    : 
@CALLS      : 
@CREATED    : 1997/01/10, GPW
@MODIFIED   : 
-------------------------------------------------------------------------- */
void postprocess_entry (AST *top, bt_options_t *options)
{
   AST   *cur;
                                /* options when processing macro expansions: */
                                /* keep quotes, expand other macros, paste */
                                /* strings, and don't touch whitespace */
   static bt_options_t macro_options = { 0, 0, 1, 1, 0 };
   
   if (top == NULL) return;     /* not even an entry at all! */
   strlwr (top->text);          /* downcase entry type */

   if (top->down == NULL) return; /* no children at all */
   
   cur = top->down;
   if (cur->nodetype == AST_KEY)
      cur = cur->right;

   if (top->nodetype == AST_MACRODEF)
      options = &macro_options;

   while (cur)
   {
      postprocess_field (cur, options, 1);
      cur = cur->right;
   }

   if (top->nodetype == AST_MACRODEF)
      add_macro (top);

} /* postprocess_entry() */
