[apple/icu.git] / icuSources / tools / genrb / read.c

/*
*******************************************************************************
*
*   Copyright (C) 1998-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File read.c
*
* Modification History:
*
*   Date        Name        Description
*   05/26/99    stephen     Creation.
*   5/10/01     Ram         removed ustdio dependency
*******************************************************************************
*/

#include "read.h"
#include "errmsg.h"
#include "unicode/ustring.h"

#define OPENBRACE    0x007B
#define CLOSEBRACE   0x007D
#define COMMA        0x002C
#define QUOTE        0x0022
#define ESCAPE       0x005C
#define SLASH        0x002F
#define ASTERISK     0x002A
#define SPACE        0x0020
#define COLON        0x003A
#define BADBOM       0xFFFE
#define CR           0x000D
#define LF           0x000A
               
static int32_t lineCount;

/* Protos */
static enum ETokenType getStringToken(UCHARBUF *buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status);

static UChar32 getNextChar           (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
static void    seekUntilNewline      (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static void    seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static UBool   isWhitespace          (UChar32 c);
static UBool   isNewline             (UChar32 c);

U_CFUNC void resetLineNumber() {
    lineCount = 1;
}

/* Read and return the next token from the stream.  If the token is of
   type eString, fill in the token parameter with the token.  If the
   token is eError, then the status parameter will contain the
   specific error.  This will be eItemNotFound at the end of file,
   indicating that all tokens have been returned.  This method will
   never return eString twice in a row; instead, multiple adjacent
   string tokens will be merged into one, with no intervening
   space. */
U_CFUNC enum ETokenType
getNextToken(UCHARBUF* buf,
             struct UString *token,
             uint32_t *linenumber, /* out: linenumber of token */
             struct UString *comment,
             UErrorCode *status) {
    enum ETokenType result;
    UChar32         c;

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* Skip whitespace */
    c = getNextChar(buf, TRUE, comment, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    *linenumber = lineCount;

    switch(c) {
    case BADBOM:
        return TOK_ERROR;
    case OPENBRACE:
        return TOK_OPEN_BRACE;
    case CLOSEBRACE:
        return TOK_CLOSE_BRACE;
    case COMMA:
        return TOK_COMMA;
    case U_EOF:
        return TOK_EOF;
    case COLON:
        return TOK_COLON;

    default:
        result = getStringToken(buf, c, token, status);
    }

    *linenumber = lineCount;
    return result;
}

/* Copy a string token into the given UnicodeString.  Upon entry, we
   have already read the first character of the string token, which is
   not a whitespace character (but may be a QUOTE or ESCAPE). This
   function reads all subsequent characters that belong with this
   string, and copy them into the token parameter. The other
   important, and slightly convoluted purpose of this function is to
   merge adjacent strings.  It looks forward a bit, and if the next
   non comment, non whitespace item is a string, it reads it in as
   well.  If two adjacent strings are quoted, they are merged without
   intervening space.  Otherwise a single SPACE character is
   inserted. */
static enum ETokenType getStringToken(UCHARBUF* buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status) {
    UBool    lastStringWasQuoted;
    UChar32  c;
    UChar    target[3] = { '\0' };
    UChar    *pTarget   = target;
    int      len=0;
    UBool    isFollowingCharEscaped=FALSE;
    UBool    isNLUnescaped = FALSE;
    UChar32  prevC=0;

    /* We are guaranteed on entry that initialChar is not a whitespace
       character. If we are at the EOF, or have some other problem, it
       doesn't matter; we still want to validly return the initialChar
       (if nothing else) as a string token. */

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* setup */
    lastStringWasQuoted = FALSE;
    c = initialChar;
    ustr_setlen(token, 0, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    for (;;) {
        if (c == QUOTE) {
            if (!lastStringWasQuoted && token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }

            lastStringWasQuoted = TRUE;

            for (;;) {
                c = ucbuf_getc(buf,status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_EOF;
                }

                /* Unterminated quoted strings */
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }

                if (c == QUOTE && !isFollowingCharEscaped) {
                    break;
                }

                if (c == ESCAPE  && !isFollowingCharEscaped) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                    if(c == CR || c == LF){
                        isNLUnescaped = TRUE;
                    }
                }               

                if(c==ESCAPE && !isFollowingCharEscaped){
                    isFollowingCharEscaped = TRUE;
                }else{
                    U_APPEND_CHAR32(c, pTarget,len);
                    pTarget = target;
                    ustr_uscat(token, pTarget,len, status);
                    isFollowingCharEscaped = FALSE;
                    len=0;
                    if(c == CR || c == LF){
                        if(isNLUnescaped == FALSE && prevC!=CR){
                            lineCount++;
                        }
                        isNLUnescaped = FALSE;
                    }
                }
                
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
                prevC = c;
            }
        } else {
            if (token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
            
            if(lastStringWasQuoted){
                if(getShowWarning()){
                    warning(lineCount, "Mixing quoted and unquoted strings");
                }
                if(isStrict()){
                    return TOK_ERROR;
                }

            }

            lastStringWasQuoted = FALSE;
            
            /* if we reach here we are mixing 
             * quoted and unquoted strings
             * warn in normal mode and error in
             * pedantic mode
             */

            if (c == ESCAPE) {
                pTarget = target;
                c       = unescape(buf, status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_ERROR;
                }
            }

            U_APPEND_CHAR32(c, pTarget,len);
            pTarget = target;
            ustr_uscat(token, pTarget,len, status);
            len=0;
            
            if (U_FAILURE(*status)) {
                return TOK_ERROR;
            }

            for (;;) {
                /* DON'T skip whitespace */
                c = getNextChar(buf, FALSE, NULL, status);

                /* EOF reached */
                if (c == U_EOF) {
                    ucbuf_ungetc(c, buf);
                    return TOK_STRING;
                }

                if (U_FAILURE(*status)) {
                    return TOK_STRING;
                }

                if (c == QUOTE
                        || c == OPENBRACE
                        || c == CLOSEBRACE
                        || c == COMMA
                        || c == COLON) {
                    ucbuf_ungetc(c, buf);
                    break;
                }

                if (isWhitespace(c)) {
                    break;
                }

                if (c == ESCAPE) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                }

                U_APPEND_CHAR32(c, pTarget,len);
                pTarget = target;
                ustr_uscat(token, pTarget,len, status);
                len=0;
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
        }

        /* DO skip whitespace */
        c = getNextChar(buf, TRUE, NULL, status);

        if (U_FAILURE(*status)) {
            return TOK_STRING;
        }

        if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
            ucbuf_ungetc(c, buf);
            return TOK_STRING;
        }
    }
}

/* Retrieve the next character.  If skipwhite is
   true, whitespace is skipped as well. */
static UChar32 getNextChar(UCHARBUF* buf,
                           UBool skipwhite,
                           struct UString *token,
                           UErrorCode *status) {
    UChar32 c, c2;

    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    for (;;) {
        c = ucbuf_getc(buf,status);

        if (c == U_EOF) {
            return U_EOF;
        }

        if (skipwhite && isWhitespace(c)) {
            continue;
        }

        /* This also handles the get() failing case */
        if (c != SLASH) {
            return c;
        }

        c = ucbuf_getc(buf,status); /* "/c" */

        if (c == U_EOF) {
            return U_EOF;
        }

        switch (c) {
        case SLASH:  /* "//" */
            seekUntilNewline(buf, NULL, status);
            break;

        case ASTERISK:  /* " / * " */
            c2 = ucbuf_getc(buf, status); /* "/ * c" */
            if(c2 == ASTERISK){  /* "/ * *" */
                /* parse multi-line comment and store it in token*/
                seekUntilEndOfComment(buf, token, status);
            } else {
                ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *".  Include c2  back in buffer.  */
                seekUntilEndOfComment(buf, NULL, status);
            }
            break;

        default:
            ucbuf_ungetc(c, buf); /* "/c" - put back the c */
            /* If get() failed this is a NOP */
            return SLASH;
        }

    }
}

static void seekUntilNewline(UCHARBUF* buf,
                             struct UString *token,
                             UErrorCode *status) {
    UChar32 c;

    if (U_FAILURE(*status)) {
        return;
    }

    do {
        c = ucbuf_getc(buf,status);
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
    } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
}

static void seekUntilEndOfComment(UCHARBUF *buf,
                                  struct UString *token,
                                  UErrorCode *status) {
    UChar32  c, d;
    uint32_t line;

    if (U_FAILURE(*status)) {
        return;
    }

    line = lineCount;

    do {
        c = ucbuf_getc(buf, status);

        if (c == ASTERISK) {
            d = ucbuf_getc(buf, status);

            if (d != SLASH) {
                ucbuf_ungetc(d, buf);
            } else {
                break;
            }
        }
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
        /* increment the lineCount */
        isNewline(c);

    } while (c != U_EOF && *status == U_ZERO_ERROR);

    if (c == U_EOF) {
        *status = U_INVALID_FORMAT_ERROR;
        error(line, "unterminated comment detected");
    }
}

U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    /* We expect to be called after the ESCAPE has been seen, but
     * u_fgetcx needs an ESCAPE to do its magic. */
    ucbuf_ungetc(ESCAPE, buf);

    return ucbuf_getcx32(buf, status);
}

static UBool isWhitespace(UChar32 c) {
    switch (c) {
        /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
    case 0x0020:
    case 0x0009:
    case 0xFEFF:
        return TRUE;

    default:
        return FALSE;
    }
}

static UBool isNewline(UChar32 c) {
    switch (c) {
        /* '\n', '\r', 0x2029 */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
        return TRUE;

    default:
        return FALSE;
    }
}
Commit	Line	Data
b75a7d8f A	1	/*
	2	*******************************************************************************
	3	*
4388f060	4	* Copyright (C) 1998-2011, International Business Machines
b75a7d8f A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	*
	9	* File read.c
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 05/26/99 stephen Creation.
	15	* 5/10/01 Ram removed ustdio dependency
	16	*******************************************************************************
	17	*/
	18
	19	#include "read.h"
	20	#include "errmsg.h"
	21	#include "unicode/ustring.h"
	22
	23	#define OPENBRACE 0x007B
	24	#define CLOSEBRACE 0x007D
	25	#define COMMA 0x002C
	26	#define QUOTE 0x0022
	27	#define ESCAPE 0x005C
	28	#define SLASH 0x002F
	29	#define ASTERISK 0x002A
	30	#define SPACE 0x0020
	31	#define COLON 0x003A
	32	#define BADBOM 0xFFFE
374ca955 A	33	#define CR 0x000D
	34	#define LF 0x000A
	35
b75a7d8f A	36	static int32_t lineCount;
	37
	38	/* Protos */
	39	static enum ETokenType getStringToken(UCHARBUF *buf,
	40	UChar32 initialChar,
	41	struct UString *token,
	42	UErrorCode *status);
	43
374ca955 A	44	static UChar32 getNextChar (UCHARBUF buf, UBool skipwhite, struct UString token, UErrorCode *status);
	45	static void seekUntilNewline (UCHARBUF buf, struct UString token, UErrorCode *status);
	46	static void seekUntilEndOfComment (UCHARBUF buf, struct UString token, UErrorCode *status);
b75a7d8f A	47	static UBool isWhitespace (UChar32 c);
	48	static UBool isNewline (UChar32 c);
	49
4388f060	50	U_CFUNC void resetLineNumber() {
b75a7d8f A	51	lineCount = 1;
	52	}
	53
	54	/* Read and return the next token from the stream. If the token is of
	55	type eString, fill in the token parameter with the token. If the
	56	token is eError, then the status parameter will contain the
	57	specific error. This will be eItemNotFound at the end of file,
	58	indicating that all tokens have been returned. This method will
	59	never return eString twice in a row; instead, multiple adjacent
	60	string tokens will be merged into one, with no intervening
	61	space. */
4388f060 A	62	U_CFUNC enum ETokenType
	63	getNextToken(UCHARBUF* buf,
	64	struct UString *token,
	65	uint32_t linenumber, / out: linenumber of token */
	66	struct UString *comment,
	67	UErrorCode *status) {
b75a7d8f A	68	enum ETokenType result;
	69	UChar32 c;
	70
	71	if (U_FAILURE(*status)) {
	72	return TOK_ERROR;
	73	}
	74
	75	/* Skip whitespace */
374ca955	76	c = getNextChar(buf, TRUE, comment, status);
b75a7d8f A	77
	78	if (U_FAILURE(*status)) {
	79	return TOK_ERROR;
	80	}
	81
	82	*linenumber = lineCount;
	83
	84	switch(c) {
	85	case BADBOM:
	86	return TOK_ERROR;
	87	case OPENBRACE:
	88	return TOK_OPEN_BRACE;
	89	case CLOSEBRACE:
	90	return TOK_CLOSE_BRACE;
	91	case COMMA:
	92	return TOK_COMMA;
	93	case U_EOF:
	94	return TOK_EOF;
	95	case COLON:
	96	return TOK_COLON;
	97
	98	default:
	99	result = getStringToken(buf, c, token, status);
	100	}
	101
	102	*linenumber = lineCount;
	103	return result;
	104	}
	105
	106	/* Copy a string token into the given UnicodeString. Upon entry, we
	107	have already read the first character of the string token, which is
	108	not a whitespace character (but may be a QUOTE or ESCAPE). This
	109	function reads all subsequent characters that belong with this
	110	string, and copy them into the token parameter. The other
	111	important, and slightly convoluted purpose of this function is to
	112	merge adjacent strings. It looks forward a bit, and if the next
	113	non comment, non whitespace item is a string, it reads it in as
	114	well. If two adjacent strings are quoted, they are merged without
	115	intervening space. Otherwise a single SPACE character is
	116	inserted. */
	117	static enum ETokenType getStringToken(UCHARBUF* buf,
	118	UChar32 initialChar,
	119	struct UString *token,
	120	UErrorCode *status) {
	121	UBool lastStringWasQuoted;
	122	UChar32 c;
	123	UChar target[3] = { '\0' };
	124	UChar *pTarget = target;
	125	int len=0;
	126	UBool isFollowingCharEscaped=FALSE;
374ca955 A	127	UBool isNLUnescaped = FALSE;
374ca955 A	128	UChar32 prevC=0;
b75a7d8f A	129
	130	/* We are guaranteed on entry that initialChar is not a whitespace
	131	character. If we are at the EOF, or have some other problem, it
	132	doesn't matter; we still want to validly return the initialChar
	133	(if nothing else) as a string token. */
	134
	135	if (U_FAILURE(*status)) {
	136	return TOK_ERROR;
	137	}
	138
	139	/* setup */
	140	lastStringWasQuoted = FALSE;
	141	c = initialChar;
	142	ustr_setlen(token, 0, status);
	143
	144	if (U_FAILURE(*status)) {
	145	return TOK_ERROR;
	146	}
	147
	148	for (;;) {
	149	if (c == QUOTE) {
	150	if (!lastStringWasQuoted && token->fLength > 0) {
	151	ustr_ucat(token, SPACE, status);
	152
	153	if (U_FAILURE(*status)) {
	154	return TOK_ERROR;
	155	}
	156	}
	157
	158	lastStringWasQuoted = TRUE;
	159
	160	for (;;) {
	161	c = ucbuf_getc(buf,status);
	162
	163	/* EOF reached */
	164	if (c == U_EOF) {
	165	return TOK_EOF;
	166	}
	167
	168	/* Unterminated quoted strings */
	169	if (U_FAILURE(*status)) {
	170	return TOK_ERROR;
	171	}
	172
	173	if (c == QUOTE && !isFollowingCharEscaped) {
	174	break;
	175	}
	176
	177	if (c == ESCAPE && !isFollowingCharEscaped) {
	178	pTarget = target;
	179	c = unescape(buf, status);
	180
	181	if (c == U_ERR) {
	182	return TOK_ERROR;
	183	}
374ca955 A	184	if(c == CR \|\| c == LF){
	185	isNLUnescaped = TRUE;
	186	}
b75a7d8f A	187	}
	188
	189	if(c==ESCAPE && !isFollowingCharEscaped){
	190	isFollowingCharEscaped = TRUE;
	191	}else{
	192	U_APPEND_CHAR32(c, pTarget,len);
	193	pTarget = target;
	194	ustr_uscat(token, pTarget,len, status);
	195	isFollowingCharEscaped = FALSE;
	196	len=0;
374ca955 A	197	if(c == CR \|\| c == LF){
	198	if(isNLUnescaped == FALSE && prevC!=CR){
	199	lineCount++;
	200	}
	201	isNLUnescaped = FALSE;
	202	}
b75a7d8f A	203	}
	204
	205	if (U_FAILURE(*status)) {
	206	return TOK_ERROR;
	207	}
374ca955	208	prevC = c;
b75a7d8f A	209	}
	210	} else {
	211	if (token->fLength > 0) {
	212	ustr_ucat(token, SPACE, status);
	213
	214	if (U_FAILURE(*status)) {
	215	return TOK_ERROR;
	216	}
	217	}
	218
	219	if(lastStringWasQuoted){
	220	if(getShowWarning()){
	221	warning(lineCount, "Mixing quoted and unquoted strings");
	222	}
	223	if(isStrict()){
	224	return TOK_ERROR;
	225	}
	226
	227	}
	228
	229	lastStringWasQuoted = FALSE;
	230
	231	/* if we reach here we are mixing
	232	* quoted and unquoted strings
	233	* warn in normal mode and error in
	234	* pedantic mode
	235	*/
	236
	237	if (c == ESCAPE) {
	238	pTarget = target;
	239	c = unescape(buf, status);
	240
	241	/* EOF reached */
	242	if (c == U_EOF) {
	243	return TOK_ERROR;
	244	}
	245	}
	246
	247	U_APPEND_CHAR32(c, pTarget,len);
	248	pTarget = target;
	249	ustr_uscat(token, pTarget,len, status);
	250	len=0;
374ca955	251
b75a7d8f A	252	if (U_FAILURE(*status)) {
	253	return TOK_ERROR;
	254	}
	255
	256	for (;;) {
	257	/* DON'T skip whitespace */
374ca955	258	c = getNextChar(buf, FALSE, NULL, status);
b75a7d8f A	259
	260	/* EOF reached */
	261	if (c == U_EOF) {
	262	ucbuf_ungetc(c, buf);
	263	return TOK_STRING;
	264	}
	265
	266	if (U_FAILURE(*status)) {
	267	return TOK_STRING;
	268	}
	269
	270	if (c == QUOTE
	271	\|\| c == OPENBRACE
	272	\|\| c == CLOSEBRACE
	273	\|\| c == COMMA
	274	\|\| c == COLON) {
	275	ucbuf_ungetc(c, buf);
	276	break;
	277	}
	278
	279	if (isWhitespace(c)) {
	280	break;
	281	}
	282
	283	if (c == ESCAPE) {
	284	pTarget = target;
	285	c = unescape(buf, status);
	286
	287	if (c == U_ERR) {
	288	return TOK_ERROR;
	289	}
	290	}
	291
	292	U_APPEND_CHAR32(c, pTarget,len);
	293	pTarget = target;
	294	ustr_uscat(token, pTarget,len, status);
	295	len=0;
	296	if (U_FAILURE(*status)) {
	297	return TOK_ERROR;
	298	}
	299	}
	300	}
	301
	302	/* DO skip whitespace */
374ca955	303	c = getNextChar(buf, TRUE, NULL, status);
b75a7d8f A	304
	305	if (U_FAILURE(*status)) {
	306	return TOK_STRING;
	307	}
	308
	309	if (c == OPENBRACE \|\| c == CLOSEBRACE \|\| c == COMMA \|\| c == COLON) {
	310	ucbuf_ungetc(c, buf);
	311	return TOK_STRING;
	312	}
	313	}
	314	}
	315
374ca955	316	/* Retrieve the next character. If skipwhite is
b75a7d8f A	317	true, whitespace is skipped as well. */
	318	static UChar32 getNextChar(UCHARBUF* buf,
	319	UBool skipwhite,
374ca955	320	struct UString *token,
b75a7d8f	321	UErrorCode *status) {
374ca955	322	UChar32 c, c2;
b75a7d8f A	323
	324	if (U_FAILURE(*status)) {
	325	return U_EOF;
	326	}
	327
	328	for (;;) {
	329	c = ucbuf_getc(buf,status);
	330
	331	if (c == U_EOF) {
	332	return U_EOF;
	333	}
	334
	335	if (skipwhite && isWhitespace(c)) {
	336	continue;
	337	}
	338
	339	/* This also handles the get() failing case */
	340	if (c != SLASH) {
	341	return c;
	342	}
	343
46f4442e	344	c = ucbuf_getc(buf,status); /* "/c" */
b75a7d8f A	345
	346	if (c == U_EOF) {
	347	return U_EOF;
	348	}
	349
	350	switch (c) {
46f4442e	351	case SLASH: /* "//" */
374ca955	352	seekUntilNewline(buf, NULL, status);
b75a7d8f A	353	break;
b75a7d8f A	354
729e4ab9 A	355	case ASTERISK: /* " / * " */
	356	c2 = ucbuf_getc(buf, status); /* "/ * c" */
	357	if(c2 == ASTERISK){ /* "/ * " /
374ca955 A	358	/* parse multi-line comment and store it in token*/
374ca955 A	359	seekUntilEndOfComment(buf, token, status);
46f4442e	360	} else {
729e4ab9	361	ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ ". Include c2 back in buffer. /
374ca955 A	362	seekUntilEndOfComment(buf, NULL, status);
374ca955 A	363	}
b75a7d8f A	364	break;
	365
	366	default:
46f4442e	367	ucbuf_ungetc(c, buf); /* "/c" - put back the c */
b75a7d8f A	368	/* If get() failed this is a NOP */
	369	return SLASH;
	370	}
374ca955	371
b75a7d8f A	372	}
	373	}
	374
	375	static void seekUntilNewline(UCHARBUF* buf,
374ca955	376	struct UString *token,
b75a7d8f A	377	UErrorCode *status) {
	378	UChar32 c;
	379
	380	if (U_FAILURE(*status)) {
	381	return;
	382	}
	383
	384	do {
	385	c = ucbuf_getc(buf,status);
374ca955 A	386	/* add the char to token */
	387	if(token!=NULL){
	388	ustr_u32cat(token, c, status);
	389	}
b75a7d8f A	390	} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
	391	}
	392
	393	static void seekUntilEndOfComment(UCHARBUF *buf,
374ca955	394	struct UString *token,
b75a7d8f A	395	UErrorCode *status) {
	396	UChar32 c, d;
	397	uint32_t line;
	398
	399	if (U_FAILURE(*status)) {
	400	return;
	401	}
	402
	403	line = lineCount;
	404
	405	do {
	406	c = ucbuf_getc(buf, status);
	407
	408	if (c == ASTERISK) {
	409	d = ucbuf_getc(buf, status);
	410
	411	if (d != SLASH) {
	412	ucbuf_ungetc(d, buf);
	413	} else {
	414	break;
	415	}
	416	}
374ca955 A	417	/* add the char to token */
	418	if(token!=NULL){
	419	ustr_u32cat(token, c, status);
	420	}
	421	/* increment the lineCount */
	422	isNewline(c);
	423
b75a7d8f A	424	} while (c != U_EOF && *status == U_ZERO_ERROR);
	425
	426	if (c == U_EOF) {
	427	*status = U_INVALID_FORMAT_ERROR;
	428	error(line, "unterminated comment detected");
	429	}
	430	}
	431
4388f060	432	U_CFUNC UChar32 unescape(UCHARBUF buf, UErrorCode status) {
b75a7d8f A	433	if (U_FAILURE(*status)) {
	434	return U_EOF;
	435	}
	436
	437	/* We expect to be called after the ESCAPE has been seen, but
	438	* u_fgetcx needs an ESCAPE to do its magic. */
	439	ucbuf_ungetc(ESCAPE, buf);
	440
	441	return ucbuf_getcx32(buf, status);
	442	}
	443
	444	static UBool isWhitespace(UChar32 c) {
	445	switch (c) {
	446	/* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
	447	case 0x000A:
	448	case 0x2029:
	449	lineCount++;
	450	case 0x000D:
	451	case 0x0020:
	452	case 0x0009:
	453	case 0xFEFF:
	454	return TRUE;
	455
	456	default:
	457	return FALSE;
	458	}
	459	}
	460
	461	static UBool isNewline(UChar32 c) {
	462	switch (c) {
	463	/* '\n', '\r', 0x2029 */
	464	case 0x000A:
	465	case 0x2029:
	466	lineCount++;
	467	case 0x000D:
	468	return TRUE;
	469
	470	default:
	471	return FALSE;
	472	}
	473	}