[apple/icu.git] / icuSources / tools / genrb / read.c

/*
*******************************************************************************
*
*   Copyright (C) 1998-2008, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File read.c
*
* Modification History:
*
*   Date        Name        Description
*   05/26/99    stephen     Creation.
*   5/10/01     Ram         removed ustdio dependency
*******************************************************************************
*/

#include "read.h"
#include "errmsg.h"
#include "unicode/ustring.h"

#define OPENBRACE    0x007B
#define CLOSEBRACE   0x007D
#define COMMA        0x002C
#define QUOTE        0x0022
#define ESCAPE       0x005C
#define SLASH        0x002F
#define ASTERISK     0x002A
#define SPACE        0x0020
#define COLON        0x003A
#define BADBOM       0xFFFE
#define CR           0x000D
#define LF           0x000A
               
static int32_t lineCount;

/* Protos */
static enum ETokenType getStringToken(UCHARBUF *buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status);

static UChar32 getNextChar           (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
static void    seekUntilNewline      (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static void    seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static UBool   isWhitespace          (UChar32 c);
static UBool   isNewline             (UChar32 c);

void resetLineNumber() {
    lineCount = 1;
}

/* Read and return the next token from the stream.  If the token is of
   type eString, fill in the token parameter with the token.  If the
   token is eError, then the status parameter will contain the
   specific error.  This will be eItemNotFound at the end of file,
   indicating that all tokens have been returned.  This method will
   never return eString twice in a row; instead, multiple adjacent
   string tokens will be merged into one, with no intervening
   space. */
enum ETokenType getNextToken(UCHARBUF* buf,
                             struct UString *token,
                             uint32_t *linenumber, /* out: linenumber of token */
                             struct UString *comment,
                             UErrorCode *status) {
    enum ETokenType result;
    UChar32         c;

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* Skip whitespace */
    c = getNextChar(buf, TRUE, comment, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    *linenumber = lineCount;

    switch(c) {
    case BADBOM:
        return TOK_ERROR;
    case OPENBRACE:
        return TOK_OPEN_BRACE;
    case CLOSEBRACE:
        return TOK_CLOSE_BRACE;
    case COMMA:
        return TOK_COMMA;
    case U_EOF:
        return TOK_EOF;
    case COLON:
        return TOK_COLON;

    default:
        result = getStringToken(buf, c, token, status);
    }

    *linenumber = lineCount;
    return result;
}

/* Copy a string token into the given UnicodeString.  Upon entry, we
   have already read the first character of the string token, which is
   not a whitespace character (but may be a QUOTE or ESCAPE). This
   function reads all subsequent characters that belong with this
   string, and copy them into the token parameter. The other
   important, and slightly convoluted purpose of this function is to
   merge adjacent strings.  It looks forward a bit, and if the next
   non comment, non whitespace item is a string, it reads it in as
   well.  If two adjacent strings are quoted, they are merged without
   intervening space.  Otherwise a single SPACE character is
   inserted. */
static enum ETokenType getStringToken(UCHARBUF* buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status) {
    UBool    lastStringWasQuoted;
    UChar32  c;
    UChar    target[3] = { '\0' };
    UChar    *pTarget   = target;
    int      len=0;
    UBool    isFollowingCharEscaped=FALSE;
    UBool    isNLUnescaped = FALSE;
    UChar32  prevC=0;

    /* We are guaranteed on entry that initialChar is not a whitespace
       character. If we are at the EOF, or have some other problem, it
       doesn't matter; we still want to validly return the initialChar
       (if nothing else) as a string token. */

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* setup */
    lastStringWasQuoted = FALSE;
    c = initialChar;
    ustr_setlen(token, 0, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    for (;;) {
        if (c == QUOTE) {
            if (!lastStringWasQuoted && token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }

            lastStringWasQuoted = TRUE;

            for (;;) {
                c = ucbuf_getc(buf,status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_EOF;
                }

                /* Unterminated quoted strings */
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }

                if (c == QUOTE && !isFollowingCharEscaped) {
                    break;
                }

                if (c == ESCAPE  && !isFollowingCharEscaped) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                    if(c == CR || c == LF){
                        isNLUnescaped = TRUE;
                    }
                }               

                if(c==ESCAPE && !isFollowingCharEscaped){
                    isFollowingCharEscaped = TRUE;
                }else{
                    U_APPEND_CHAR32(c, pTarget,len);
                    pTarget = target;
                    ustr_uscat(token, pTarget,len, status);
                    isFollowingCharEscaped = FALSE;
                    len=0;
                    if(c == CR || c == LF){
                        if(isNLUnescaped == FALSE && prevC!=CR){
                            lineCount++;
                        }
                        isNLUnescaped = FALSE;
                    }
                }
                
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
                prevC = c;
            }
        } else {
            if (token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
            
            if(lastStringWasQuoted){
                if(getShowWarning()){
                    warning(lineCount, "Mixing quoted and unquoted strings");
                }
                if(isStrict()){
                    return TOK_ERROR;
                }

            }

            lastStringWasQuoted = FALSE;
            
            /* if we reach here we are mixing 
             * quoted and unquoted strings
             * warn in normal mode and error in
             * pedantic mode
             */

            if (c == ESCAPE) {
                pTarget = target;
                c       = unescape(buf, status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_ERROR;
                }
            }

            U_APPEND_CHAR32(c, pTarget,len);
            pTarget = target;
            ustr_uscat(token, pTarget,len, status);
            len=0;
            
            if (U_FAILURE(*status)) {
                return TOK_ERROR;
            }

            for (;;) {
                /* DON'T skip whitespace */
                c = getNextChar(buf, FALSE, NULL, status);

                /* EOF reached */
                if (c == U_EOF) {
                    ucbuf_ungetc(c, buf);
                    return TOK_STRING;
                }

                if (U_FAILURE(*status)) {
                    return TOK_STRING;
                }

                if (c == QUOTE
                        || c == OPENBRACE
                        || c == CLOSEBRACE
                        || c == COMMA
                        || c == COLON) {
                    ucbuf_ungetc(c, buf);
                    break;
                }

                if (isWhitespace(c)) {
                    break;
                }

                if (c == ESCAPE) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                }

                U_APPEND_CHAR32(c, pTarget,len);
                pTarget = target;
                ustr_uscat(token, pTarget,len, status);
                len=0;
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
        }

        /* DO skip whitespace */
        c = getNextChar(buf, TRUE, NULL, status);

        if (U_FAILURE(*status)) {
            return TOK_STRING;
        }

        if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
            ucbuf_ungetc(c, buf);
            return TOK_STRING;
        }
    }
}

/* Retrieve the next character.  If skipwhite is
   true, whitespace is skipped as well. */
static UChar32 getNextChar(UCHARBUF* buf,
                           UBool skipwhite,
                           struct UString *token,
                           UErrorCode *status) {
    UChar32 c, c2;

    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    for (;;) {
        c = ucbuf_getc(buf,status);

        if (c == U_EOF) {
            return U_EOF;
        }

        if (skipwhite && isWhitespace(c)) {
            continue;
        }

        /* This also handles the get() failing case */
        if (c != SLASH) {
            return c;
        }

        c = ucbuf_getc(buf,status); /* "/c" */

        if (c == U_EOF) {
            return U_EOF;
        }

        switch (c) {
        case SLASH:  /* "//" */
            seekUntilNewline(buf, NULL, status);
            break;

        case ASTERISK:  /* "/*" */
            c2 = ucbuf_getc(buf, status); /* "/*c" */
            if(c2 == ASTERISK){  /* "/**" */
                /* parse multi-line comment and store it in token*/
                seekUntilEndOfComment(buf, token, status);
            } else {
                ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/*".  Include c2  back in buffer.  */
                seekUntilEndOfComment(buf, NULL, status);
            }
            break;

        default:
            ucbuf_ungetc(c, buf); /* "/c" - put back the c */
            /* If get() failed this is a NOP */
            return SLASH;
        }

    }
}

static void seekUntilNewline(UCHARBUF* buf,
                             struct UString *token,
                             UErrorCode *status) {
    UChar32 c;

    if (U_FAILURE(*status)) {
        return;
    }

    do {
        c = ucbuf_getc(buf,status);
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
    } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
}

static void seekUntilEndOfComment(UCHARBUF *buf,
                                  struct UString *token,
                                  UErrorCode *status) {
    UChar32  c, d;
    uint32_t line;

    if (U_FAILURE(*status)) {
        return;
    }

    line = lineCount;

    do {
        c = ucbuf_getc(buf, status);

        if (c == ASTERISK) {
            d = ucbuf_getc(buf, status);

            if (d != SLASH) {
                ucbuf_ungetc(d, buf);
            } else {
                break;
            }
        }
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
        /* increment the lineCount */
        isNewline(c);

    } while (c != U_EOF && *status == U_ZERO_ERROR);

    if (c == U_EOF) {
        *status = U_INVALID_FORMAT_ERROR;
        error(line, "unterminated comment detected");
    }
}

UChar32 unescape(UCHARBUF *buf,
                 UErrorCode *status) {
    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    /* We expect to be called after the ESCAPE has been seen, but
     * u_fgetcx needs an ESCAPE to do its magic. */
    ucbuf_ungetc(ESCAPE, buf);

    return ucbuf_getcx32(buf, status);
}

static UBool isWhitespace(UChar32 c) {
    switch (c) {
        /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
    case 0x0020:
    case 0x0009:
    case 0xFEFF:
        return TRUE;

    default:
        return FALSE;
    }
}

static UBool isNewline(UChar32 c) {
    switch (c) {
        /* '\n', '\r', 0x2029 */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
        return TRUE;

    default:
        return FALSE;
    }
}
Commit	Line	Data
b75a7d8f A	1	/*
	2	*******************************************************************************
	3	*
46f4442e	4	* Copyright (C) 1998-2008, International Business Machines
b75a7d8f A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	*
	9	* File read.c
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 05/26/99 stephen Creation.
	15	* 5/10/01 Ram removed ustdio dependency
	16	*******************************************************************************
	17	*/
	18
	19	#include "read.h"
	20	#include "errmsg.h"
	21	#include "unicode/ustring.h"
	22
	23	#define OPENBRACE 0x007B
	24	#define CLOSEBRACE 0x007D
	25	#define COMMA 0x002C
	26	#define QUOTE 0x0022
	27	#define ESCAPE 0x005C
	28	#define SLASH 0x002F
	29	#define ASTERISK 0x002A
	30	#define SPACE 0x0020
	31	#define COLON 0x003A
	32	#define BADBOM 0xFFFE
374ca955 A	33	#define CR 0x000D
	34	#define LF 0x000A
	35
b75a7d8f A	36	static int32_t lineCount;
	37
	38	/* Protos */
	39	static enum ETokenType getStringToken(UCHARBUF *buf,
	40	UChar32 initialChar,
	41	struct UString *token,
	42	UErrorCode *status);
	43
374ca955 A	44	static UChar32 getNextChar (UCHARBUF buf, UBool skipwhite, struct UString token, UErrorCode *status);
	45	static void seekUntilNewline (UCHARBUF buf, struct UString token, UErrorCode *status);
	46	static void seekUntilEndOfComment (UCHARBUF buf, struct UString token, UErrorCode *status);
b75a7d8f A	47	static UBool isWhitespace (UChar32 c);
	48	static UBool isNewline (UChar32 c);
	49
	50	void resetLineNumber() {
	51	lineCount = 1;
	52	}
	53
	54	/* Read and return the next token from the stream. If the token is of
	55	type eString, fill in the token parameter with the token. If the
	56	token is eError, then the status parameter will contain the
	57	specific error. This will be eItemNotFound at the end of file,
	58	indicating that all tokens have been returned. This method will
	59	never return eString twice in a row; instead, multiple adjacent
	60	string tokens will be merged into one, with no intervening
	61	space. */
	62	enum ETokenType getNextToken(UCHARBUF* buf,
	63	struct UString *token,
	64	uint32_t linenumber, / out: linenumber of token */
374ca955	65	struct UString *comment,
b75a7d8f A	66	UErrorCode *status) {
	67	enum ETokenType result;
	68	UChar32 c;
	69
	70	if (U_FAILURE(*status)) {
	71	return TOK_ERROR;
	72	}
	73
	74	/* Skip whitespace */
374ca955	75	c = getNextChar(buf, TRUE, comment, status);
b75a7d8f A	76
	77	if (U_FAILURE(*status)) {
	78	return TOK_ERROR;
	79	}
	80
	81	*linenumber = lineCount;
	82
	83	switch(c) {
	84	case BADBOM:
	85	return TOK_ERROR;
	86	case OPENBRACE:
	87	return TOK_OPEN_BRACE;
	88	case CLOSEBRACE:
	89	return TOK_CLOSE_BRACE;
	90	case COMMA:
	91	return TOK_COMMA;
	92	case U_EOF:
	93	return TOK_EOF;
	94	case COLON:
	95	return TOK_COLON;
	96
	97	default:
	98	result = getStringToken(buf, c, token, status);
	99	}
	100
	101	*linenumber = lineCount;
	102	return result;
	103	}
	104
	105	/* Copy a string token into the given UnicodeString. Upon entry, we
	106	have already read the first character of the string token, which is
	107	not a whitespace character (but may be a QUOTE or ESCAPE). This
	108	function reads all subsequent characters that belong with this
	109	string, and copy them into the token parameter. The other
	110	important, and slightly convoluted purpose of this function is to
	111	merge adjacent strings. It looks forward a bit, and if the next
	112	non comment, non whitespace item is a string, it reads it in as
	113	well. If two adjacent strings are quoted, they are merged without
	114	intervening space. Otherwise a single SPACE character is
	115	inserted. */
	116	static enum ETokenType getStringToken(UCHARBUF* buf,
	117	UChar32 initialChar,
	118	struct UString *token,
	119	UErrorCode *status) {
	120	UBool lastStringWasQuoted;
	121	UChar32 c;
	122	UChar target[3] = { '\0' };
	123	UChar *pTarget = target;
	124	int len=0;
	125	UBool isFollowingCharEscaped=FALSE;
374ca955 A	126	UBool isNLUnescaped = FALSE;
374ca955 A	127	UChar32 prevC=0;
b75a7d8f A	128
	129	/* We are guaranteed on entry that initialChar is not a whitespace
	130	character. If we are at the EOF, or have some other problem, it
	131	doesn't matter; we still want to validly return the initialChar
	132	(if nothing else) as a string token. */
	133
	134	if (U_FAILURE(*status)) {
	135	return TOK_ERROR;
	136	}
	137
	138	/* setup */
	139	lastStringWasQuoted = FALSE;
	140	c = initialChar;
	141	ustr_setlen(token, 0, status);
	142
	143	if (U_FAILURE(*status)) {
	144	return TOK_ERROR;
	145	}
	146
	147	for (;;) {
	148	if (c == QUOTE) {
	149	if (!lastStringWasQuoted && token->fLength > 0) {
	150	ustr_ucat(token, SPACE, status);
	151
	152	if (U_FAILURE(*status)) {
	153	return TOK_ERROR;
	154	}
	155	}
	156
	157	lastStringWasQuoted = TRUE;
	158
	159	for (;;) {
	160	c = ucbuf_getc(buf,status);
	161
	162	/* EOF reached */
	163	if (c == U_EOF) {
	164	return TOK_EOF;
	165	}
	166
	167	/* Unterminated quoted strings */
	168	if (U_FAILURE(*status)) {
	169	return TOK_ERROR;
	170	}
	171
	172	if (c == QUOTE && !isFollowingCharEscaped) {
	173	break;
	174	}
	175
	176	if (c == ESCAPE && !isFollowingCharEscaped) {
	177	pTarget = target;
	178	c = unescape(buf, status);
	179
	180	if (c == U_ERR) {
	181	return TOK_ERROR;
	182	}
374ca955 A	183	if(c == CR \|\| c == LF){
	184	isNLUnescaped = TRUE;
	185	}
b75a7d8f A	186	}
	187
	188	if(c==ESCAPE && !isFollowingCharEscaped){
	189	isFollowingCharEscaped = TRUE;
	190	}else{
	191	U_APPEND_CHAR32(c, pTarget,len);
	192	pTarget = target;
	193	ustr_uscat(token, pTarget,len, status);
	194	isFollowingCharEscaped = FALSE;
	195	len=0;
374ca955 A	196	if(c == CR \|\| c == LF){
	197	if(isNLUnescaped == FALSE && prevC!=CR){
	198	lineCount++;
	199	}
	200	isNLUnescaped = FALSE;
	201	}
b75a7d8f A	202	}
	203
	204	if (U_FAILURE(*status)) {
	205	return TOK_ERROR;
	206	}
374ca955	207	prevC = c;
b75a7d8f A	208	}
	209	} else {
	210	if (token->fLength > 0) {
	211	ustr_ucat(token, SPACE, status);
	212
	213	if (U_FAILURE(*status)) {
	214	return TOK_ERROR;
	215	}
	216	}
	217
	218	if(lastStringWasQuoted){
	219	if(getShowWarning()){
	220	warning(lineCount, "Mixing quoted and unquoted strings");
	221	}
	222	if(isStrict()){
	223	return TOK_ERROR;
	224	}
	225
	226	}
	227
	228	lastStringWasQuoted = FALSE;
	229
	230	/* if we reach here we are mixing
	231	* quoted and unquoted strings
	232	* warn in normal mode and error in
	233	* pedantic mode
	234	*/
	235
	236	if (c == ESCAPE) {
	237	pTarget = target;
	238	c = unescape(buf, status);
	239
	240	/* EOF reached */
	241	if (c == U_EOF) {
	242	return TOK_ERROR;
	243	}
	244	}
	245
	246	U_APPEND_CHAR32(c, pTarget,len);
	247	pTarget = target;
	248	ustr_uscat(token, pTarget,len, status);
	249	len=0;
374ca955	250
b75a7d8f A	251	if (U_FAILURE(*status)) {
	252	return TOK_ERROR;
	253	}
	254
	255	for (;;) {
	256	/* DON'T skip whitespace */
374ca955	257	c = getNextChar(buf, FALSE, NULL, status);
b75a7d8f A	258
	259	/* EOF reached */
	260	if (c == U_EOF) {
	261	ucbuf_ungetc(c, buf);
	262	return TOK_STRING;
	263	}
	264
	265	if (U_FAILURE(*status)) {
	266	return TOK_STRING;
	267	}
	268
	269	if (c == QUOTE
	270	\|\| c == OPENBRACE
	271	\|\| c == CLOSEBRACE
	272	\|\| c == COMMA
	273	\|\| c == COLON) {
	274	ucbuf_ungetc(c, buf);
	275	break;
	276	}
	277
	278	if (isWhitespace(c)) {
	279	break;
	280	}
	281
	282	if (c == ESCAPE) {
	283	pTarget = target;
	284	c = unescape(buf, status);
	285
	286	if (c == U_ERR) {
	287	return TOK_ERROR;
	288	}
	289	}
	290
	291	U_APPEND_CHAR32(c, pTarget,len);
	292	pTarget = target;
	293	ustr_uscat(token, pTarget,len, status);
	294	len=0;
	295	if (U_FAILURE(*status)) {
	296	return TOK_ERROR;
	297	}
	298	}
	299	}
	300
	301	/* DO skip whitespace */
374ca955	302	c = getNextChar(buf, TRUE, NULL, status);
b75a7d8f A	303
	304	if (U_FAILURE(*status)) {
	305	return TOK_STRING;
	306	}
	307
	308	if (c == OPENBRACE \|\| c == CLOSEBRACE \|\| c == COMMA \|\| c == COLON) {
	309	ucbuf_ungetc(c, buf);
	310	return TOK_STRING;
	311	}
	312	}
	313	}
	314
374ca955	315	/* Retrieve the next character. If skipwhite is
b75a7d8f A	316	true, whitespace is skipped as well. */
	317	static UChar32 getNextChar(UCHARBUF* buf,
	318	UBool skipwhite,
374ca955	319	struct UString *token,
b75a7d8f	320	UErrorCode *status) {
374ca955	321	UChar32 c, c2;
b75a7d8f A	322
	323	if (U_FAILURE(*status)) {
	324	return U_EOF;
	325	}
	326
	327	for (;;) {
	328	c = ucbuf_getc(buf,status);
	329
	330	if (c == U_EOF) {
	331	return U_EOF;
	332	}
	333
	334	if (skipwhite && isWhitespace(c)) {
	335	continue;
	336	}
	337
	338	/* This also handles the get() failing case */
	339	if (c != SLASH) {
	340	return c;
	341	}
	342
46f4442e	343	c = ucbuf_getc(buf,status); /* "/c" */
b75a7d8f A	344
	345	if (c == U_EOF) {
	346	return U_EOF;
	347	}
	348
	349	switch (c) {
46f4442e	350	case SLASH: /* "//" */
374ca955	351	seekUntilNewline(buf, NULL, status);
b75a7d8f A	352	break;
b75a7d8f A	353
46f4442e A	354	case ASTERISK: /* "/" /
	355	c2 = ucbuf_getc(buf, status); /* "/c" /
	356	if(c2 == ASTERISK){ /* "/*" /
374ca955 A	357	/* parse multi-line comment and store it in token*/
374ca955 A	358	seekUntilEndOfComment(buf, token, status);
46f4442e A	359	} else {
46f4442e A	360	ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/". Include c2 back in buffer. /
374ca955 A	361	seekUntilEndOfComment(buf, NULL, status);
374ca955 A	362	}
b75a7d8f A	363	break;
	364
	365	default:
46f4442e	366	ucbuf_ungetc(c, buf); /* "/c" - put back the c */
b75a7d8f A	367	/* If get() failed this is a NOP */
	368	return SLASH;
	369	}
374ca955	370
b75a7d8f A	371	}
	372	}
	373
	374	static void seekUntilNewline(UCHARBUF* buf,
374ca955	375	struct UString *token,
b75a7d8f A	376	UErrorCode *status) {
	377	UChar32 c;
	378
	379	if (U_FAILURE(*status)) {
	380	return;
	381	}
	382
	383	do {
	384	c = ucbuf_getc(buf,status);
374ca955 A	385	/* add the char to token */
	386	if(token!=NULL){
	387	ustr_u32cat(token, c, status);
	388	}
b75a7d8f A	389	} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
	390	}
	391
	392	static void seekUntilEndOfComment(UCHARBUF *buf,
374ca955	393	struct UString *token,
b75a7d8f A	394	UErrorCode *status) {
	395	UChar32 c, d;
	396	uint32_t line;
	397
	398	if (U_FAILURE(*status)) {
	399	return;
	400	}
	401
	402	line = lineCount;
	403
	404	do {
	405	c = ucbuf_getc(buf, status);
	406
	407	if (c == ASTERISK) {
	408	d = ucbuf_getc(buf, status);
	409
	410	if (d != SLASH) {
	411	ucbuf_ungetc(d, buf);
	412	} else {
	413	break;
	414	}
	415	}
374ca955 A	416	/* add the char to token */
	417	if(token!=NULL){
	418	ustr_u32cat(token, c, status);
	419	}
	420	/* increment the lineCount */
	421	isNewline(c);
	422
b75a7d8f A	423	} while (c != U_EOF && *status == U_ZERO_ERROR);
	424
	425	if (c == U_EOF) {
	426	*status = U_INVALID_FORMAT_ERROR;
	427	error(line, "unterminated comment detected");
	428	}
	429	}
	430
	431	UChar32 unescape(UCHARBUF *buf,
	432	UErrorCode *status) {
	433	if (U_FAILURE(*status)) {
	434	return U_EOF;
	435	}
	436
	437	/* We expect to be called after the ESCAPE has been seen, but
	438	* u_fgetcx needs an ESCAPE to do its magic. */
	439	ucbuf_ungetc(ESCAPE, buf);
	440
	441	return ucbuf_getcx32(buf, status);
	442	}
	443
	444	static UBool isWhitespace(UChar32 c) {
	445	switch (c) {
	446	/* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
	447	case 0x000A:
	448	case 0x2029:
	449	lineCount++;
	450	case 0x000D:
	451	case 0x0020:
	452	case 0x0009:
	453	case 0xFEFF:
	454	return TRUE;
	455
	456	default:
	457	return FALSE;
	458	}
	459	}
	460
	461	static UBool isNewline(UChar32 c) {
	462	switch (c) {
	463	/* '\n', '\r', 0x2029 */
	464	case 0x000A:
	465	case 0x2029:
	466	lineCount++;
	467	case 0x000D:
	468	return TRUE;
	469
	470	default:
	471	return FALSE;
	472	}
	473	}