• R/O
  • HTTP
  • SSH
  • HTTPS

タグ
未設定

よく使われているワード(クリックで追加)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

POSIX.1 National Language Support API for MinGW


ファイル情報

Rev. e8a4ca7c72694a37645e1013e561569c8dbb322b
サイズ 41,238 バイト
日時 2008-02-16 21:57:19
作者 Keith Marshall
ログメッセージ

MinGW-catgets version 1.0.1 released.
Source update only: correct make install' defect.

内容

/*
 * mcsource.c
 *
 * $Id$
 *
 * Copyright (C) 2006, 2007, 2008, Keith Marshall
 *
 * This file implements the message catalogue source code parser, which is
 * used internally by `gencat', to compile message dictionaries.
 *
 * Written by Keith Marshall  <keithmarshall@users.sourceforge.net>
 * Last modification: 08-Jan-2008
 *
 *
 * This is free software.  It is provided AS IS, in the hope that it may
 * be useful, but WITHOUT WARRANTY OF ANY KIND, not even an IMPLIED WARRANTY
 * of MERCHANTABILITY, nor of FITNESS FOR ANY PARTICULAR PURPOSE.
 *
 * Permission is granted to redistribute this software, either "as is" or
 * in modified form, under the terms of the GNU General Public License, as
 * published by the Free Software Foundation; either version 2, or (at your
 * option) any later version.
 *
 * You should have received a copy of the GNU General Public License
 * along with this software; see the file COPYING.  If not, write to the
 * Free Software Foundation, 51 Franklin St - Fifth Floor, Boston,
 * MA 02110-1301, USA.
 *
 */
#define WIN32_LEAN_AND_MEAN
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

/* Win32 hosts don't have native support for `LC_MESSAGES',
 * which is required by POSIX, but MinGW may allow us to emulate it,
 * with this define *before* we source locale.h.
 */
#define MINGW32_LC_EXTENSIONS  MINGW32_LC_ENVVARS | MINGW32_LC_MESSAGES

#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
#include <locale.h>
#include <limits.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <nl_types.h>
#include <langinfo.h>

#include <gencat.h>
#include <gcmsgs.h>
#include <debug.h>

#include <platform.h>
#include <mcutfsig.h>

#ifdef DEBUG_BUFSIZ
# undef  BUFSIZ
# define BUFSIZ DEBUG_BUFSIZ
#endif

/* The following define the input states of the gencat parser
*/
#define NEWLINE    0x0010   /* set at start of each logical input line  */
#define CONTINUED  0x0020   /* set if newline escaped on previous line  */
#define DIRECTIVE  0x0100   /* set while parsing a gencat directive     */
#define NUMERIC    0x0200   /* set while parsing a message/set number   */
#define MSGTEXT    0x0400   /* set while parsing message text           */
#define NEWMSG     0x0800   /* set when waiting to parse a new message  */
#define FLUSH      0x0800   /* set when message cache must be flushed   */
#define QUOTED     0x1000   /* set if the current message is quoted     */
#define ENCODED    0x2000   /* set while parsing locale encoded input   */
#define CONTEXT    0x4000   /* set when the message locale is active    */

#define CATEGORY   0x000F   /* mask to extract directive action codes   */

#define ADDSET     0x0001   /* action code set by `$set' directive      */
#define DELSET     0x0002   /* action code set by `$delset' directive   */
#define DEFQUOTE   0x0003   /* action code set by `$quote' directive    */

#define DEFCONV    0x0008   /* comment category for codeset declaration */

/* CONTINUED is a specialised case of the more general ESCAPE mechanism,
 * so let them share a common control flag.
 */
#define ESCAPE     CONTINUED

static
int mc_directive( int status, const char *keyword )
{
  /* Identify a GENCAT directive, based on a specified keyword,
   * and activate the appropriate parser attribute bits to process it.
   */
  static struct directives
  {
    /* Defines the dictionary of known directives,
     * and the parser attributes associated with each.
     */
    char *keyword;  /* the actual keyword identifying the directive */
    int   mask;     /* mask defining the attribute bits it preserves */
    int   enable;   /* additional attribute bits it turns on */
  }
  dictionary[] =
  { /*
     * keyword   mask                  enables attributes
     * -------   -------------------   --------------------
     */
    { "set",     0xfff0 & ~DIRECTIVE,  ADDSET   | NUMERIC },
    { "delset",  0xfff0 & ~DIRECTIVE,  DELSET   | NUMERIC },
    { "quote",   0xfff0 & ~DIRECTIVE,  DEFQUOTE | ENCODED },
    /*
     * An entry with a NULL keyword pointer marks
     * the end of the lookup table, and specifies the
     * default mask and flags, to be applied when
     * an unrecognised keyword is parsed.
     */
    {  NULL,     0xfff0 & ~DIRECTIVE,   0               }
  };

  struct directives *lookup = dictionary;
  while( lookup->keyword && strcmp( keyword, lookup->keyword ) )
    ++lookup;

  return (status & lookup->mask) | lookup->enable;
}

static
char *mc_default_codeset( void )
{
  /* Helper function, called when the message definition file for a
   * catalogue doesn't explicitly specify a codeset for the messages;
   * establish the default codeset for the message catalogue, using
   * the codeset of the LC_MESSAGES category in the present locale.
   */
  char *codeset;

  if( (codeset = setlocale( LC_MESSAGES, "" )) == NULL )
    codeset = setlocale( LC_MESSAGES, NULL );
  setlocale( LC_CTYPE, codeset );
  codeset = strdup( nl_langinfo( CODESET ));
  setlocale( LC_CTYPE, "C" );

  return codeset;
}

static
int mc_errout( const char *src, long linenum, const char *fmt, ... )
{
  /* Message dispatcher for error messages,
   * used when `gencat_errno' is to be set to indicate `EXIT_FAILURE'.
   */
  va_list args;
  va_start( args, fmt );
  fprintf( stderr, "%s:%ld:", src, linenum );
  vfprintf( stderr, fmt, args );
  va_end( args );
  return EXIT_FAILURE;
}

static inline
off_t mc_workspace_wanted( int fd )
{
  struct stat info;
# ifndef DEBUG
  if( (fstat( fd, &info ) == 0) && (info.st_size > (off_t)(0)) )
  {
    dfprintf(( stderr, "allocate workspace: %lu bytes", (unsigned long)(info.st_size) ));
    return info.st_size;
  }
# endif
  dfprintf(( stderr, "allocate default workspace: %lu bytes", BUFSIZ ));
  return (off_t)(BUFSIZ);
}

static inline
size_t mc_add_escape( iconv_t *iconv_map, char *msgbuf, wchar_t code )
{
/* A trivial helper function, for encoding an escape sequence into the
 * compiled message stream.
 */
  dfprintf(( stderr, "add escape code: %0#4.4x", code ));
  return iconv_wctomb( msgbuf, code );
}

static inline
char *mc_update_workspace( char *buf, char *cache, unsigned int count )
{
  /* A helper function, to transfer encoded text from the input buffer
   * to the workspace in which compiled messages are being collected.
   */
# ifdef DEBUG
  unsigned int xcount = count;
  char *start = buf;
# endif
  while( count-- )
    *buf++ = *cache++;
# ifdef DEBUG
  *buf = '\0';
  fprintf( stderr, "cache flush: %u byte%s%s", xcount, xcount ? (xcount == 1 ? ": " : "s: ") : "s", start );
# endif
  return buf;
}

static inline
struct msgdict *mc_discard( struct msgdict *index, char *messages )
{
  /* A helper function, to reclaim all memory allocated to a local
   * message dictionary, prior to aborting compilation of the current
   * message catalogue source file.
   */
  while( index )
  {
    /* Walk the linked list of dictionary index entries, (if any),
     * releasing the memory block alloted to each individually.
     */
    struct msgdict *next = index->link;
    free( index );
    index = next;
  }
  if( messages )
    /*
     * All of the indexed messages are collected into a single block,
     * which is allocated, and so must be released, separately.
     */
    free( messages );
  return index;
}

struct msgdict *mc_source( const char *input )
{
# define CODESET_DECLARED  codeset_decl_src, codeset_decl_lineno
# define UTF_TYPE(ORDER)   8 * input_code_size, (ORDER)

  dinvoke( int dtrace = 0; )

  long accumulator;
  int fd, input_fd, count;
  char buf[BUFSIZ], keyword[64];
  char *id;

  unsigned int status = NEWLINE;
  unsigned int linenum = 0;

  unsigned int setnum = 0;
  unsigned int msgnum = 0;
  unsigned int xcount = 0;

  wchar_t quote = L'\0';

  struct msgdict *head = NULL;
  struct msgdict *tail = NULL;

  static char *codeset = NULL;
  static const char *codeset_decl_src = NULL;
  static unsigned int codeset_decl_lineno = 0;

  unsigned short input_encoding = 0, input_code_size;

  static iconv_t iconv_map[2] = {(iconv_t)(-1), (iconv_t)(-1)};
  char *messages; off_t msgloc, headroom;
  /*
   * This `shift' state index is used to control interpretation
   * of octal escape sequences in message text; for normal text
   * processing, it should be set to zero.
   */
  unsigned shift = 0;
  /*
   * Other shift states supported, (they define the number of bits
   * by which the accumulator must be shifted to the left, in order
   * to multiply it by the associated number base), are:--
   */
#  define OCTAL_SEQUENCE_DECODE        3
#  define HEXADECIMAL_SEQUENCE_DECODE  4

  /* We use `last_char' to keep track track the character parsed
   * in the most * recently preceding cycle.  (This is required so
   * that we may explicitly handle CRLF line terminations, which are
   * to be considered as a single character code; Microsoft's `O_TEXT'
   * kludge cannot be used, because we may be running `gencat' as a
   * cross hosted tool, on a platform which doesn't support this).
   */
  wchar_t last_char = L'\0';

  /* Get a file descriptor for the input stream ...
   */
  const char *dev_stdin = "/dev/stdin";
  if( (strcmp( input, "-") == 0) || (strcasecmp( input, dev_stdin ) == 0) )
  {
    /* ... reading from standard input ...
     */
    input_fd = fd = STDIN_FILENO;
    input = dev_stdin;
  }
  /* ... or otherwise, from a named file ...
   */
  else if( (input_fd = fd = open( input, O_RDONLY | O_BINARY )) < 0 )
    /*
     * ... which we must be able to open, else we bail out.
     */
    return NULL;
  dfprintf(( stderr, "\n%s:new source file\n%s:", input, input ));

  /* Allocate the workspace, in which we will collect the text of the
   * messages to be compiled into the catalogue ...
   */
  if( (messages = mc_malloc( headroom = mc_workspace_wanted( fd ))) == NULL )
  {
    /* ... but release our input file descriptor, and bail out,
     * when we can't get sufficient memory.
     */
    close( input_fd );
    return NULL;
  }

  /* Parse the input stream ...
   */
  msgloc = (off_t)(0);
  while( (fd >= 0) && ((count = read( fd, buf, sizeof( buf ) )) > 0) )
  {
    /* ... for as long as there is text to be read ...
     */
    char *p = buf;
    int high_water_mark = count - ( count >> 2 );
    dfprintf(( stderr, "\n%s:%u:read %u byte%s", input, linenum, count, count == 1 ? "" : "s" ));

    if( input_encoding == 0 )
    {
      input_encoding = mc_utf_signature( buf );
      switch( input_code_size = input_encoding & UTF_CODE_UNIT_SIZE_MASK )
      {
	case 1:
	  if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 )
	  {
	    /*
	     * This is UTF-8 input encoding, affirmed by the presence of
	     * the byte order mark, (three bytes), which we must skip.
	     */
	    p += 3;
	    count -= 3;
	  }
	  break;

	case 2:
	case 4:
	  if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 )
	  {
	    /* This is either UTF-16, or UTF-32, also affirmed by the BOM,
	     * which occupies the first code unit, so skip it.
	     */
	    p += input_code_size;
	    count -= input_code_size;
	  }
	  break;

	default:
	  /*
	   * This isn't valid, for any recognisable codeset in the required
	   * POSIX Portable Character Set input context; diagnose, clean up,
	   * and bail out.
	   */
	  dfputc(( '\n', stderr ));
	  fprintf( errmsg( MSG_UTF_UNKNOWN ), input );
	  fprintf( errmsg( MSG_UTF_SIZE_ERROR ), input, input_code_size );
	  free( messages );
	  close( input_fd );
	  return NULL;
      }

      if( input_encoding > 1 )
      {
	/* We've detected a UTF input encoding, which implicitly specifies
	 * the codeset of the messages defined within this source file.
	 */
	char utf_byte_order = UTF_BYTE_ORDER( input_encoding );
	sprintf( keyword, "UTF-%d%cE", 8 * input_code_size, utf_byte_order );

	dfprintf(( stderr, "\n%s:", input ));
	dinvoke( if( (input_encoding & UTF_WITH_BYTE_ORDER_MARK) != 0 ) )
	  dfprintf(( stderr, "unicode byte order mark detected; " ));
	dfprintf(( stderr, "encoding identified as %s", keyword ));

	if( codeset != NULL ) 
	{
	  /* We could coalesce these two conditions into a single test,
	   * but we choose to nest them thus, to facilitate a possible
	   * future change, to support codeset alternation.
	   */
	  if( strcmp( keyword, codeset ) != 0 )
	  {
	    /* The detected UTF input encoding is not compatible with the
	     * previously declared codeset of the messages in the catalogue;
	     * diagnose, and skip this source file.
	     */
	    dfputc(( '\n', stderr ));
	    fprintf( errmsg( MSG_UTF_CODESET ), input, keyword );
	    fprintf( errmsg( MSG_HAD_CODESET ), CODESET_DECLARED, codeset );
	    free( messages );
	    close( input_fd );
	    return NULL;
	  }
	}

	else
	{
	  /* We don't yet have a codeset declaration; establish one implicitly,
	   * based on the identified input encoding.
	   */
	  id = strdup( keyword );
	  if( (codeset = map_codeset( iconv_map, id, "wchar_t" )) == NULL )
	  {
	    free( id );
	  }

	  else
	    codeset_decl_src = input;
	}
      }	
    }

    while( count > 0 )
    {
      /* ... scanning character by character,
       * through the entire content of the input buffer.
       */
      wchar_t c;
      int skip = 1;
      if( status & ENCODED )
      {
        /* We are parsing context which is defined in the codeset
         * of the current message catalogue locale, so ensure that
         * we have established an appropriate codeset mapping.
         */
        if( codeset == NULL )
        {
	  /* No codeset mapping is yet in place,
	   * so default to the codeset of the system locale.
	   */
          codeset = map_codeset( iconv_map, mc_default_codeset(), "wchar_t" );
	  codeset_decl_lineno = linenum;
	  codeset_decl_src = input;
        }
          
        /* Now we may safely interpret the input according to the
         * multibyte character codeset specified for the message catalogue,
         * transforming to the wide character domain, for local processing.
         */
        p += ((skip = iconv_mbtowc( &c, p, count )) > 0) ? skip : 0;

	/* For UTF-16 or UTF-32 input encodings, the `skip' count *must*
	 * match the codeset size, ...
	 */
	if( (input_code_size > 1) && (skip != input_code_size) )
	{
	  /* ... or we have a framing error; diagnose,
	   * and discard this input stream.
	   */
	  dfputc(( '\n', stderr ));
	  fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, codeset );
	  return mc_discard( head, messages );
	}
      }

      else
      {
        /* We are parsing context which is defined in the POSIX,
         * or "C" locale, so read single byte character sequences,
	 * but stripping out any padding NULs required to fill the
	 * input stream to a UTF-16 or UTF-32 framing boundary.
         */
	int utf_skip = input_code_size - 1;
	if( (utf_skip > 0) && ((input_encoding & UTF_BIG_ENDIAN) != 0) )
	{
	  /* Big-Endian Unicode should have padding NULs before the
	   * POSIX `C' locale byte required.
	   */
	  while( (*p == '\0') && utf_skip-- && count-- )
	    ++p;
	  if( (utf_skip > 0) || (*p == '\0') )
	  {
	    /* Diagnose and bail out, if the number of padding NULs
	     * wasn't what we expected.
	     */
	    dfputc(( '\n', stderr ));
	    fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, UTF_TYPE( 'B' ));
	    return mc_discard( head, messages );
	  }
	}
        c = (wchar_t)(*p++);
	if( (utf_skip > 0) && ((input_encoding & UTF_LITTLE_ENDIAN) != 0) )
	{
	  /* Little-Endian Unicode should have the padding NULs after
	   * this significant byte.
	   */
	  while( (*p == '\0') && utf_skip-- && count-- )
	    ++p;
	  if( (utf_skip > 0) || (*p == '\0') )
	  {
	    /* Diagnose and bail out, if the number of padding NULs
	     * wasn't what we expected.
	     */
	    dfputc(( '\n', stderr ));
	    fprintf( errmsg( MSG_UTF_FRAME_ERROR ), input, linenum, UTF_TYPE( 'L' ));
	    return mc_discard( head, messages );
	  }
	}
      }

      if( skip > 0 )
      {
        count -= skip;
        if( c == '\r' )
	  /*
	   * The current input character is a carriage return.
	   * This may simply be the lead byte of a CRLF line terminator
	   * in a CRLF format input file, but we will not know this until
	   * we examine the following input character; request a FLUSH,
	   * so we keep the workspace consistent, and defer processing
	   * this CR until the next cycle, (by which time, it will
	   * have been moved into `last_char').
	   */
	  status |= FLUSH;
	
	else if( status & NEWLINE )
        {
          /* We just started parsing a new input line ...
           * Increment the line number, reset the parser context,
           * and clear the set/message number accumulator.
           */
          ++linenum;
          status &= ~( DIRECTIVE | NUMERIC | CATEGORY );
          accumulator = 0;

          if( (status & (NEWLINE | CONTINUED)) == NEWLINE )
          {
            /* When this new line is NOT simply a logical continuation
             * of the previous line ...
             */
            dfprintf(( stderr, "\n\n%s:%d:new input record", input, linenum ));
            if( c == '$' )
            {
              /* '$' as the FIRST character of the logical line
               * means that this line is either a `gencat' directive,
               * or it's a comment.
               */
              status |= DIRECTIVE;
              id = keyword;
            }

            else if( isdigit( c ) )
            {
              /* This is a message definition line,
               * with a the message identified by an explicit numeric key.
               */
              status |= NUMERIC;
              accumulator = c - L'0';
            }
          }

          else if( status & MSGTEXT )
          {
            /* When this new line IS a continuation line,
             * and it is part of a message, which is being defined,
             * then we need to include the current input character
             * as part of the message definition.
             */
            if( c == quote )
            {
              dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
              status = (status ^ QUOTED) | FLUSH;
            }

            else
            {
              xcount += skip;
	      dinvoke(( dtrace = dtrace ? dtrace : fprintf( stderr, "\n%s:%u:scan input: ", input, linenum ) ));
              dfputc(( c, stderr ));
            }
          }

          /* Now, we dealt with the new line conditions,
           * so clear the related NEWLINE and CONTINUATION flags.
           */
          status &= ~( NEWLINE | CONTINUED );
        }

        else if( status & DIRECTIVE )
        {
          /* The input parser is in the directive identification state,
           * which persists until a space character marks the end of the
           * directive identifying keyword.
           */
          if( isspace( c ) )
          {
            /* We found the keyword delimiting space ...
            */
            if( id == keyword )
            {
              /* But, we didn't find any keyword ...
               *
               * This is a comment line, but it may be the special case of
	       * a codeset declaration comment, so we can't simply ignore it;
	       * set the comment state, to parse any codeset assignment.
               */
	      status = (status & ~CATEGORY) | DEFCONV;
              dfprintf(( stderr, "\n%s:%u:record type: comment", input, linenum ));
            }

            else if( (status & CATEGORY) == DEFCONV )
	    {
	      status &= ~(DIRECTIVE | DEFCONV);
	      if( strncmp( "codeset=", keyword, 8 ) == 0 )
	      {
		*id = '\0';
		id = strdup( keyword + 8 );
		if( codeset == NULL )
		{
		  if( (codeset = map_codeset( iconv_map, id, "wchar_t" )) == NULL )
		  {
		    free( id );
		  }

		  else
		  {
		    codeset_decl_lineno = linenum;
		    codeset_decl_src = input;
		  }
		}

		else
		{
		  if( strcmp( codeset, id ) != 0 )
		  {
		    dfputc(( '\n', stderr ));
		    fprintf( errmsg( MSG_CODESET_CLASH ), input, linenum, id );
		    fprintf( errmsg( MSG_HAD_CODESET ), CODESET_DECLARED, codeset );
		  }
		  free( id );
		}
		dfprintf(( stderr, "\n%s:%u:declare %s", input, linenum, keyword ));
	      }
	    }

	    else
            {
              /* This line has the format of a gencat directive.
	       * We have identified a possible match for a directive keyword;
               * identify it, and establish its associated parser state.
               */
              *id = '\0';
              status = mc_directive( status, keyword );
              dfprintf(( stderr, "\n%s:%u:record type: directive: %s", input, linenum, keyword ));
              dfprintf(( stderr, ": (status = %#4.4x)", status ));
            }
          }

          else
          {
            /* We are still parsing a potential directive keyword;
             * add the current character to the keyword parse buffer.
             */
            if( (id - keyword) < (sizeof( keyword ) - 1) )
              *id++ = c;
          }
        }

        else if( status & NUMERIC )
        {
          /* We are parsing a numeric value ...
           */
          if( isdigit( c ) )
          {
            /* ... and the current character is part of the number,
             * so add it into the accumulator.
             */
	    accumulator = accumulator * 10 + c - L'0';
          }

          else if( isspace( c ) )
          {
            /* We have reached the end of the number,
             * so hand it off as a set number, or a message number,
             * and process as appropriate.
             */
            switch( status & CATEGORY )
            {
              case ADDSET:
                /*
                 * Invoked by a "set" directive,
                 * open a new numbered message set within the catalogue ...
                 */
                dfprintf(( stderr, ": add set with id = %ld", accumulator ));
                if( accumulator > setnum )
                {
                  /* POSIX requires "set" directive entries to be presented
                   * in strictly ascending, (but not necessarily contiguous),
                   * "setnum" order, within the message source file.
                   *
                   * This entry satisfies the ascending "setnum" requirement,
                   * so we can simply create a new message set with this "setnum",
                   * and reset the "msgnum", for the start of a new set.
                   */
                  setnum = accumulator;
                  msgnum = 0;
                }

                else
                {
                  /* This "setnum" entry DOESN'T satisfy the ascending order rule,
                   * so complain, and bail out.
                   */
                  dfputc(( '\n', stderr ));
                  gencat_errno = mc_errout( FATAL( MSG_SETNUM_NOT_INCR ), setnum, accumulator );
		  close( input_fd );
		  return NULL;
                }
                break;

              case DELSET:
                /*
                 * Invoked by a "delset" directive,
                 * mark a numbered message set for deletion from the catalogue.
                 */
                dfprintf(( stderr, ": delete set with id = %ld", accumulator ));
		if( (accumulator > 0) && (accumulator <= NL_SETMAX) )
		{
                  struct msgdict *this;
                  if( (this = mc_malloc( sizeof( struct msgdict ))) != NULL )
                  {
                    /* We successfully created an empty dictionary slot,
		     * so fill it in as a `delset' request entry.
		     */
		    this->src = input;
		    this->lineno = linenum;
		    this->base = NULL;
		    this->set = accumulator;
		    this->msg = 0;
                    if( head == NULL )
                    {
                      /* The catalogue currently contains no records,
                       * so simply insert this as the first one.
                       */
                      head = tail = this;
                      this->link = NULL;
                    }

                    else
                    {
                      /* We've already added some message records,
                       * so the new one must be added at the end.
                       */
                      this->link = tail->link;
                      tail->link = this;
                      tail = this;
                    }
		  }
		}
                break;

              default:
                dfprintf(( stderr, "\n%s:%u:record type: message with id = %ld", input, linenum, accumulator ));
                if( accumulator > msgnum )
                {
                  /* POSIX also requires "msgnum" to be greater than any previous
                   * message defined in the current set; this declaration satisfies
                   * this requirement, so add a new message to the catalogue.
                   */
                  struct msgdict *this;
                  if( (this = mc_malloc( sizeof( struct msgdict ))) != NULL )
                  {
                    /* We successfully created an empty dictionary slot,
                     * so we may proceed to complete the message details.
                     * The message must be assigned to a numbered set, so
                     * first check that one has been opened; if not, we
                     * simply open the default set.
                     */
                    if( setnum == 0 )
                      setnum = NL_SETD;

                    /* We may now complete the message details in the new
                     * dictionary slot, and commit the record to the catalogue.
		     * Note that, if the message number tag in the source file
		     * is on an otherwise empty line, and is *immediately*
		     * followed by a newline, with no intervening space,
		     * then this message should be deleted; we flag this
		     * by setting `this->base = NULL'.  In all other cases,
		     * the message is to be placed into the catalogue, so
		     * we set 'this->base = messages'.
                     */
		    this->src = input;
		    this->base = (c == L'\n') ? NULL : messages;
		    this->lineno = linenum;
                    this->set = setnum;
                    this->msg = msgnum = accumulator;
                    this->loc = msgloc;
                    if( head == NULL )
                    {
                      /* The catalogue currently contains no records,
                       * so simply insert this as the first one.
                       */
                      head = tail = this;
                      this->link = NULL;
                    }

                    else
                    {
                      /* We've already added some message records,
                       * so the new one must be added at the end.
                       */
                      this->link = tail->link;
                      tail->link = this;
                      tail = this;
                    }
                  }
                }

                else
                {
                  /* This doesn't satisfy the POSIX requirement that,
                   * within each set, messages must appear in strictly
                   * incrementing "msgnum" order, so complain, and
                   * bail out.
                   */
                  dfputc(( '\n', stderr ));
                  gencat_errno = mc_errout( FATAL( MSG_MSGNUM_NOT_INCR ), msgnum, accumulator );
		  close( input_fd );
		  return NULL;
                }
                status |= ( MSGTEXT | ENCODED );
            }
            status &= ~( NUMERIC | CATEGORY );
          }

          else
          {
            dfprintf(( stderr, "index (abnormally terminated): %ld", accumulator ));
            status &= ~NUMERIC;
          }
        }

        else if( (status & CATEGORY) == DEFQUOTE )
        {
          /* This is the normal use of the "quote" directive,
           * followed by one delimiting space, with the next following character 
           * defining the "quote" character to be used, or "none" if no other
           * character appears before end of line.
           */
          quote = (c == L'\n') ? L'\0' : c;
          dfprintf(( stderr, quote ? ": assigned as %#4.4x" : ": none assigned", quote ));
          status &= ~( CATEGORY | ENCODED );
        }

        else if( status & MSGTEXT )
        {
          /* We are compiling a message ...
	   */
	  if( shift )
	  {
	    /* The current input character is either part of an
	     * escaped octal digit sequence, or it terminates one.
	     */
	    switch( c )
	    {
	      case L'0' ... L'7':
		/*
		 * This is a continuation of the sequence ...
		 */
		accumulator = (accumulator << shift) + c - L'0';
		break;

	      default:
		/*
		 * This is the character immediately following
		 * an encoded octal digit sequence ...
		 */
		if( accumulator > 0 )
		{
		  /* if it is a valid, non-NUL character code,
		   * add it into the workspace ...
		   */
		  size_t len;
		  dfprintf(( stderr, "\n%s:%u:", input, linenum ));
		  len = mc_add_escape( iconv_map, messages + msgloc, accumulator );
		  if( len > (size_t)(0) )
		  {
		    /* ... adjusting `headroom' and `msgloc' accordingly.
		     */
	    	    headroom -= len;
    		    msgloc += len;
		  }
		}
		/* Cancel the shift state which brought us to here;
		 * its purpose has been satisfied.
		 */
		shift = 0;
	    }
	  }
	  /* Do not use `else' here; the `shift' state may have changed
	   * since the preceding check, in which case, we may also need
	   * to do this ...
	   */
	  if( shift == 0 )
	  {
	    /* Continue scanning the current input line,
	     * until we find the end-of-line marker.
	     */
	    if( c != L'\n' )
	    {
	      /* We haven't reached end-of-line yet ...
	       * Check for other characters with special significance.
	       */
	      if( last_char == L'\r' )
	      {
		size_t len;

		/* The previous character was a deferred carriage return,
		 * but it was *not* the lead byte in a CRLF line terminator,
		 * so we need to emit it into the message definition.
		 */
		dfprintf(( stderr, "\n%s:%u:", input, linenum ));
		len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
		if( len > (size_t)(0) )
		{
		  headroom -= len;
		  msgloc += len;
		}
	      }

	      if( status & ESCAPE )
	      {
		/* The current input character was escaped ...
		 * Clear the ESCAPE flag, and interpret this case.
		 */
		size_t len = 0;
		status &= ~ESCAPE;
		dfprintf(( stderr, "\n%s:%u:", input, linenum ));
		switch( c )
		{
		  /* Thus, for the standard escape sequences ...
		   */
		  case L'b':
		    /*
		     * The "\b" escape sequence is to be interpreted as
		     * a literal backspace; encode it ...
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\b' );
		    break;

		  case L'r':
		    /*
		     * Similarly for "\r", which is to be encoded as
		     * a carriage return ...
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\r' );
		    break;

		  case L'n':
		    /*
		     * And for "\n", representing a newline ...
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\n' );
		    break;

		  case L't':
		    /*
		     * ... "\t", representing a horizontal tab ...
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\t' );
		    break;

		  case L'v':
		    /*
		     * ... "\v", representing a vertical tab ...
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\v' );
		    break;

		  case L'f':
		    /*
		     * ... and "\f", representing a form feed.
		     */
		    len = mc_add_escape( iconv_map, messages + msgloc, L'\f' );
		    break;

		  case L'0' ... L'7':
		    /*
		     * This is the first in a "\ddd" octal digit sequence;
		     * initialise the accumulator, and activate the appropriate
		     * shift state, to capture the remaining digits.
		     */
		    accumulator = c - L'0';
		    shift = OCTAL_SEQUENCE_DECODE;
		    break;

		  default:
		    /*
		     * Anything else is not a special case; we can simply pass it
		     * through as a regular character.  Notice that we don't need
		     * to treat "\\" as special; this default action produces the
		     * desired effect.
		     */
		    xcount += skip;
		    dfprintf(( stderr, "pass through escape code: %0#4.4x", c ));
		}
		if( len > (size_t)(0) )
		{
		  /* Adjust the `headroom' counter, and the current `msgloc' offset,
		   * to account for the escape code we just added to the message buffer.
		   */
		  headroom -= len;
		  msgloc += len;
		}
	      }

	      else if( c == L'\\' )
	      {
		/* This is the escape character ...
		 * Set the parser flags, so that cached message data is flushed,
		 * and switch to ESCAPE mode, to interpret the next character.
		 */
		status |= FLUSH | ESCAPE;
	      }

	      else if( c == quote )
	      {
		/* This is the designated `quote' character ...
		 * Toggle the state of the quoted context indicator flag.
		 */
		dfprintf(( stderr, "\n%s:%u:%s quoted context", input, linenum, (status & QUOTED) ? "end" : "begin" ));
		status = (status ^ QUOTED) | FLUSH;
	      }

	      else
	      {
		/* This is just a regular character ...
		 * Schedule it for copying it to the message buffer,
		 * when the next FLUSH is invoked.
		 */
		xcount += skip;
		dinvoke(( dtrace = dtrace ? dtrace : fprintf( stderr, "\n%s:%u:scan input: ", input, linenum ) ));
		dfputc(( c, stderr ));
	      }
	    }
	    if( count < ICONV_MB_LEN_MAX )
	    {
	      /* There may not be sufficient bytes in the input queue,
	       * to satisfy a fetch request for a potential multibyte sequence,
	       * so request a FLUSH now, so that the buufer may be replenished.
	       *
	       * Note that we must reset `skip', to avoid double accounting
	       * for content already scheduled for, but not yet copied to the
	       * message compilation buffer.
	       */
	      skip = 0;
	      status |= FLUSH;
	    }
	  }
	}

	if( c == L'\n' )
	{
	  /* Mark the end of the current input line,
	   * and schedule flushing of any pending message data from this line
	   * to the message collection buffer.
	   */
	  status |= NEWLINE | FLUSH;

	  /* If "QUOTED" context remains active, at the end of this line,
	   * then we have an implicit continuation, so force it.
	   */
	  if( (status & QUOTED) == QUOTED )
	    status |= CONTINUED;

	  /* Clean up the context of any pending directive processing.
	   */
	  switch( status & CATEGORY )
	  {
	    case DEFQUOTE:
	      /*
	       * If we see end of line with a DEFQUOTE pending, then
	       * there was no defining character with the "quote" directive,
	       * so we must disable "quote" character recognition.
	       */
	      quote = L'\0';
	      dfprintf(( stderr, ": none assigned" ));
	      break;
	  }

	  if( (status & CONTINUED) == 0 )
	  {
	    /* the following input line is not marked as a continuation,
	     * so its initial character *must* be interpreted as a member
	     * of the POSIX Portable Character Set.
	     */
	    status &= ~ENCODED;
	  }
	}
      }

      if( status & FLUSH )
      {
        /* We have pending message data in the input cache,
         * which now needs to be flushed to the output queue,
         * BEFORE we proceed to the next cycle.
         */
	dinvoke(( dtrace = 0 ));
        while( headroom < (xcount + ICONV_MB_LEN_MAX) )
        {
	  /* Ensure that the workspace includes sufficient free space
	   * to accommodate all content to be transferred, plus at least
	   * one additional maximum length multibyte character sequence.
	   * if not, expand it in `BUFSIZ' increments, until it does ...
	   */
          headroom += BUFSIZ;
          dfprintf(( stderr, "\n%s:%u:insufficient workspace remaining; grow allocation to %u bytes", input, linenum, (unsigned)(msgloc + headroom) ));
          if( (messages = realloc( messages, msgloc + headroom )) == NULL )
	  {
	    /* ... bailing out, if the required expansion fails.
	     */
            gencat_errno = mc_errout( FATAL( MSG_OUT_OF_MEMORY ));
	    close( input_fd );
	    return NULL;
	  }
        }
	/* Adjust the `headroom' counter, and the `msgloc' offset,
	 * to account for the content, as it is transferred.
	 */
        headroom -= xcount;
	dfprintf(( stderr, "\n%s:%u:", input, linenum ));
        msgloc = mc_update_workspace( messages + msgloc, p - xcount - skip, xcount )
               - messages;
	dfprintf(( stderr, "; %u byte%s free", headroom, headroom == 1 ? "" : "s" ));

        if( (status & (MSGTEXT | NEWLINE | CONTINUED)) == (MSGTEXT | NEWLINE) )
        {
	  /* We've found the end of a message definition record in our input,
	   * and it is not marked for continuation on the following input line;
	   * we must terminate the associated entry in our message buffer.
	   *
	   * Note that we *must* create a local variable to pass the terminator
	   * code; the `iconv_wctomb' marcro needs to pass the *address* for
	   * this to the `iconv_wrap' function.
	   */
          wchar_t terminator = L'\0';
	  if( codeset == NULL )
	  {
	    /* No codeset mapping is yet in place,
	     * so default to the codeset of the system locale.
	     */
	    codeset = map_codeset( iconv_map, mc_default_codeset(), "wchar_t" );
	    codeset_decl_lineno = linenum;
	    codeset_decl_src = input;
	  }
	  /* Encode the terminator, and add it into the workspace ...
	   */
          xcount = iconv_wctomb( messages + msgloc, terminator );
          if( xcount >= 0 )
          {
	    /* ... adjusting `headroom' counter and `msgloc' offset accordingly.
	     */
            dfprintf(( stderr, "\n%s:%u:end of message; terminator added: %d byte(s)", input, linenum, xcount ));
            msgloc += xcount;
            headroom -= xcount;
          }
          else
          {
            dfprintf(( stderr, "\n%s:%u:end of message: add terminator failed", input, linenum ));
          }
          tail->len = msgloc - tail->loc;
	  status &= ~MSGTEXT;
        }
        status &= ~FLUSH;
        xcount = 0;

	/* Keep the input buffer filled,
	 * as we parse beyond the high water mark.
	 */
	if( (p - buf) > high_water_mark )
	{
	  int ref;
	  char *copyptr;
	  for( copyptr = buf; count; count-- )
	    *copyptr++ = *p++;
	  p = buf; ref = count = copyptr - p;
	  dfprintf(( stderr, "\n%s:%u:input count depleted: %u byte%s remaining", input, linenum, count, count == 1 ? "" : "s" ));
	  if( (fd >= 0)
	  &&  (ref == (count += read( fd, copyptr, sizeof( buf ) - count )))  )
	    fd = -1;
	  dfprintf(( stderr, "; read new input: count adjusted to %u byte%s", count, count == 1 ? "" : "s" ));
	  high_water_mark = count - ( count >> 2 );
	}
      }
      /* Make a note of the character code we have just parsed,
       * for possible deferred processing in the next cycle.
       */
      last_char = c;
    }
    dfprintf(( stderr, "\n%s:end of input; (count is now %d bytes)", input, count ));
  }
  /*
   * At the end of the current input file ...
   * Check that the parser finished in an appropriate termination state.
   */
  if( status & QUOTED )
  {
    /* Abnormal termination ...
     * EOF was encountered within a quoted literal, before the closing
     * quote was found; diagnose abnormal termination state.
     */
    fprintf( errmsg( MSG_EOF_IN_QUOTES ), input, linenum );
  }

  if( (status & NEWLINE) != NEWLINE )
  {
    /* Abnormal termination ...
     * The input file lacks a terminating newline; diagnose abnormal
     * termination state.
     */
    fprintf( errmsg( MSG_MISSING_NEWLINE ), input, linenum );
  }

  if( status & MSGTEXT )
  {
    /* Abnormal termination ...
     * EOF was encountered while parsing a continued message definition;
     * dignose abnormal termination state, and mark incomplete message
     * for deletion.
     */
    fprintf( errmsg( MSG_TEXT_DISCARDED ), input, tail->lineno );
    tail->base = NULL;
  }

  /* After completing the construction of the message list,
   * adjust its allocated memory size to the actual size used,
   * then point all index entries to the resultant data block.
   */
  messages = mc_realloc( messages, (unsigned)(msgloc) );
  dfprintf(( stderr, "\n\nAllocation adjusted to %u bytes\n", (unsigned)(msgloc) ));
  for( tail = head; tail != NULL; tail = tail->link )
  {
    /* Just do this for all entries in the list!
     * Don't assume we can optimise by quitting if we find a reference
     * which is already mapped to the correct address; the list could
     * have moved, and subsequently have moved back to the old address,
     * in which case a later entry could be invalid.
     */
    if( tail->base != NULL )
      /*
       * Update index entries *except* those with a NULL base pointer;
       * (those which are NULL based indicate entities to be deleted!!!)
       */
      tail->base = messages;
    dfprintf(( stderr, "\nindex: %#08lx; text: %s", tail->key, messages + tail->loc ));
  }
  dfputc(( L'\n', stderr ));

  /* We are done with the current input source;
   * close its file descriptor, and return the message list.
   */
  close( input_fd );
  return head;
}

/* $RCSfile$Revision: 1.12 $: end of file */