[apple/icu.git] / icuSources / tools / genrb / read.c

/*
*******************************************************************************
*
*   Copyright (C) 1998-2012, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File read.c
*
* Modification History:
*
*   Date        Name        Description
*   05/26/99    stephen     Creation.
*   5/10/01     Ram         removed ustdio dependency
*******************************************************************************
*/

#include "read.h"
#include "errmsg.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"

#define OPENBRACE    0x007B
#define CLOSEBRACE   0x007D
#define COMMA        0x002C
#define QUOTE        0x0022
#define ESCAPE       0x005C
#define SLASH        0x002F
#define ASTERISK     0x002A
#define SPACE        0x0020
#define COLON        0x003A
#define BADBOM       0xFFFE
#define CR           0x000D
#define LF           0x000A
               
static int32_t lineCount;

/* Protos */
static enum ETokenType getStringToken(UCHARBUF *buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status);

static UChar32 getNextChar           (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
static void    seekUntilNewline      (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static void    seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static UBool   isWhitespace          (UChar32 c);
static UBool   isNewline             (UChar32 c);

U_CFUNC void resetLineNumber() {
    lineCount = 1;
}

/* Read and return the next token from the stream.  If the token is of
   type eString, fill in the token parameter with the token.  If the
   token is eError, then the status parameter will contain the
   specific error.  This will be eItemNotFound at the end of file,
   indicating that all tokens have been returned.  This method will
   never return eString twice in a row; instead, multiple adjacent
   string tokens will be merged into one, with no intervening
   space. */
U_CFUNC enum ETokenType
getNextToken(UCHARBUF* buf,
             struct UString *token,
             uint32_t *linenumber, /* out: linenumber of token */
             struct UString *comment,
             UErrorCode *status) {
    enum ETokenType result;
    UChar32         c;

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* Skip whitespace */
    c = getNextChar(buf, TRUE, comment, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    *linenumber = lineCount;

    switch(c) {
    case BADBOM:
        return TOK_ERROR;
    case OPENBRACE:
        return TOK_OPEN_BRACE;
    case CLOSEBRACE:
        return TOK_CLOSE_BRACE;
    case COMMA:
        return TOK_COMMA;
    case U_EOF:
        return TOK_EOF;
    case COLON:
        return TOK_COLON;

    default:
        result = getStringToken(buf, c, token, status);
    }

    *linenumber = lineCount;
    return result;
}

/* Copy a string token into the given UnicodeString.  Upon entry, we
   have already read the first character of the string token, which is
   not a whitespace character (but may be a QUOTE or ESCAPE). This
   function reads all subsequent characters that belong with this
   string, and copy them into the token parameter. The other
   important, and slightly convoluted purpose of this function is to
   merge adjacent strings.  It looks forward a bit, and if the next
   non comment, non whitespace item is a string, it reads it in as
   well.  If two adjacent strings are quoted, they are merged without
   intervening space.  Otherwise a single SPACE character is
   inserted. */
static enum ETokenType getStringToken(UCHARBUF* buf,
                                      UChar32 initialChar,
                                      struct UString *token,
                                      UErrorCode *status) {
    UBool    lastStringWasQuoted;
    UChar32  c;
    UChar    target[3] = { '\0' };
    UChar    *pTarget   = target;
    int      len=0;
    UBool    isFollowingCharEscaped=FALSE;
    UBool    isNLUnescaped = FALSE;
    UChar32  prevC=0;

    /* We are guaranteed on entry that initialChar is not a whitespace
       character. If we are at the EOF, or have some other problem, it
       doesn't matter; we still want to validly return the initialChar
       (if nothing else) as a string token. */

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    /* setup */
    lastStringWasQuoted = FALSE;
    c = initialChar;
    ustr_setlen(token, 0, status);

    if (U_FAILURE(*status)) {
        return TOK_ERROR;
    }

    for (;;) {
        if (c == QUOTE) {
            if (!lastStringWasQuoted && token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }

            lastStringWasQuoted = TRUE;

            for (;;) {
                c = ucbuf_getc(buf,status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_EOF;
                }

                /* Unterminated quoted strings */
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }

                if (c == QUOTE && !isFollowingCharEscaped) {
                    break;
                }

                if (c == ESCAPE  && !isFollowingCharEscaped) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                    if(c == CR || c == LF){
                        isNLUnescaped = TRUE;
                    }
                }               

                if(c==ESCAPE && !isFollowingCharEscaped){
                    isFollowingCharEscaped = TRUE;
                }else{
                    U_APPEND_CHAR32(c, pTarget,len);
                    pTarget = target;
                    ustr_uscat(token, pTarget,len, status);
                    isFollowingCharEscaped = FALSE;
                    len=0;
                    if(c == CR || c == LF){
                        if(isNLUnescaped == FALSE && prevC!=CR){
                            lineCount++;
                        }
                        isNLUnescaped = FALSE;
                    }
                }
                
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
                prevC = c;
            }
        } else {
            if (token->fLength > 0) {
                ustr_ucat(token, SPACE, status);

                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
            
            if(lastStringWasQuoted){
                if(getShowWarning()){
                    warning(lineCount, "Mixing quoted and unquoted strings");
                }
                if(isStrict()){
                    return TOK_ERROR;
                }

            }

            lastStringWasQuoted = FALSE;
            
            /* if we reach here we are mixing 
             * quoted and unquoted strings
             * warn in normal mode and error in
             * pedantic mode
             */

            if (c == ESCAPE) {
                pTarget = target;
                c       = unescape(buf, status);

                /* EOF reached */
                if (c == U_EOF) {
                    return TOK_ERROR;
                }
            }

            U_APPEND_CHAR32(c, pTarget,len);
            pTarget = target;
            ustr_uscat(token, pTarget,len, status);
            len=0;
            
            if (U_FAILURE(*status)) {
                return TOK_ERROR;
            }

            for (;;) {
                /* DON'T skip whitespace */
                c = getNextChar(buf, FALSE, NULL, status);

                /* EOF reached */
                if (c == U_EOF) {
                    ucbuf_ungetc(c, buf);
                    return TOK_STRING;
                }

                if (U_FAILURE(*status)) {
                    return TOK_STRING;
                }

                if (c == QUOTE
                        || c == OPENBRACE
                        || c == CLOSEBRACE
                        || c == COMMA
                        || c == COLON) {
                    ucbuf_ungetc(c, buf);
                    break;
                }

                if (isWhitespace(c)) {
                    break;
                }

                if (c == ESCAPE) {
                    pTarget = target;
                    c       = unescape(buf, status);

                    if (c == U_ERR) {
                        return TOK_ERROR;
                    }
                }

                U_APPEND_CHAR32(c, pTarget,len);
                pTarget = target;
                ustr_uscat(token, pTarget,len, status);
                len=0;
                if (U_FAILURE(*status)) {
                    return TOK_ERROR;
                }
            }
        }

        /* DO skip whitespace */
        c = getNextChar(buf, TRUE, NULL, status);

        if (U_FAILURE(*status)) {
            return TOK_STRING;
        }

        if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
            ucbuf_ungetc(c, buf);
            return TOK_STRING;
        }
    }
}

/* Retrieve the next character.  If skipwhite is
   true, whitespace is skipped as well. */
static UChar32 getNextChar(UCHARBUF* buf,
                           UBool skipwhite,
                           struct UString *token,
                           UErrorCode *status) {
    UChar32 c, c2;

    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    for (;;) {
        c = ucbuf_getc(buf,status);

        if (c == U_EOF) {
            return U_EOF;
        }

        if (skipwhite && isWhitespace(c)) {
            continue;
        }

        /* This also handles the get() failing case */
        if (c != SLASH) {
            return c;
        }

        c = ucbuf_getc(buf,status); /* "/c" */

        if (c == U_EOF) {
            return U_EOF;
        }

        switch (c) {
        case SLASH:  /* "//" */
            seekUntilNewline(buf, NULL, status);
            break;

        case ASTERISK:  /* " / * " */
            c2 = ucbuf_getc(buf, status); /* "/ * c" */
            if(c2 == ASTERISK){  /* "/ * *" */
                /* parse multi-line comment and store it in token*/
                seekUntilEndOfComment(buf, token, status);
            } else {
                ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *".  Include c2  back in buffer.  */
                seekUntilEndOfComment(buf, NULL, status);
            }
            break;

        default:
            ucbuf_ungetc(c, buf); /* "/c" - put back the c */
            /* If get() failed this is a NOP */
            return SLASH;
        }

    }
}

static void seekUntilNewline(UCHARBUF* buf,
                             struct UString *token,
                             UErrorCode *status) {
    UChar32 c;

    if (U_FAILURE(*status)) {
        return;
    }

    do {
        c = ucbuf_getc(buf,status);
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
    } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
}

static void seekUntilEndOfComment(UCHARBUF *buf,
                                  struct UString *token,
                                  UErrorCode *status) {
    UChar32  c, d;
    uint32_t line;

    if (U_FAILURE(*status)) {
        return;
    }

    line = lineCount;

    do {
        c = ucbuf_getc(buf, status);

        if (c == ASTERISK) {
            d = ucbuf_getc(buf, status);

            if (d != SLASH) {
                ucbuf_ungetc(d, buf);
            } else {
                break;
            }
        }
        /* add the char to token */
        if(token!=NULL){
            ustr_u32cat(token, c, status);
        }
        /* increment the lineCount */
        isNewline(c);

    } while (c != U_EOF && *status == U_ZERO_ERROR);

    if (c == U_EOF) {
        *status = U_INVALID_FORMAT_ERROR;
        error(line, "unterminated comment detected");
    }
}

U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
    if (U_FAILURE(*status)) {
        return U_EOF;
    }

    /* We expect to be called after the ESCAPE has been seen, but
     * u_fgetcx needs an ESCAPE to do its magic. */
    ucbuf_ungetc(ESCAPE, buf);

    return ucbuf_getcx32(buf, status);
}

static UBool isWhitespace(UChar32 c) {
    switch (c) {
        /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
    case 0x0020:
    case 0x0009:
    case 0xFEFF:
        return TRUE;

    default:
        return FALSE;
    }
}

static UBool isNewline(UChar32 c) {
    switch (c) {
        /* '\n', '\r', 0x2029 */
    case 0x000A:
    case 0x2029:
        lineCount++;
    case 0x000D:
        return TRUE;

    default:
        return FALSE;
    }
}
Commit	Line	Data
b75a7d8f A	1	/*
	2	*******************************************************************************
	3	*
51004dcb	4	* Copyright (C) 1998-2012, International Business Machines
b75a7d8f A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	*
	9	* File read.c
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 05/26/99 stephen Creation.
	15	* 5/10/01 Ram removed ustdio dependency
	16	*******************************************************************************
	17	*/
	18
	19	#include "read.h"
	20	#include "errmsg.h"
	21	#include "unicode/ustring.h"
51004dcb	22	#include "unicode/utf16.h"
b75a7d8f A	23
	24	#define OPENBRACE 0x007B
	25	#define CLOSEBRACE 0x007D
	26	#define COMMA 0x002C
	27	#define QUOTE 0x0022
	28	#define ESCAPE 0x005C
	29	#define SLASH 0x002F
	30	#define ASTERISK 0x002A
	31	#define SPACE 0x0020
	32	#define COLON 0x003A
	33	#define BADBOM 0xFFFE
374ca955 A	34	#define CR 0x000D
	35	#define LF 0x000A
	36
b75a7d8f A	37	static int32_t lineCount;
	38
	39	/* Protos */
	40	static enum ETokenType getStringToken(UCHARBUF *buf,
	41	UChar32 initialChar,
	42	struct UString *token,
	43	UErrorCode *status);
	44
374ca955 A	45	static UChar32 getNextChar (UCHARBUF buf, UBool skipwhite, struct UString token, UErrorCode *status);
	46	static void seekUntilNewline (UCHARBUF buf, struct UString token, UErrorCode *status);
	47	static void seekUntilEndOfComment (UCHARBUF buf, struct UString token, UErrorCode *status);
b75a7d8f A	48	static UBool isWhitespace (UChar32 c);
	49	static UBool isNewline (UChar32 c);
	50
4388f060	51	U_CFUNC void resetLineNumber() {
b75a7d8f A	52	lineCount = 1;
	53	}
	54
	55	/* Read and return the next token from the stream. If the token is of
	56	type eString, fill in the token parameter with the token. If the
	57	token is eError, then the status parameter will contain the
	58	specific error. This will be eItemNotFound at the end of file,
	59	indicating that all tokens have been returned. This method will
	60	never return eString twice in a row; instead, multiple adjacent
	61	string tokens will be merged into one, with no intervening
	62	space. */
4388f060 A	63	U_CFUNC enum ETokenType
	64	getNextToken(UCHARBUF* buf,
	65	struct UString *token,
	66	uint32_t linenumber, / out: linenumber of token */
	67	struct UString *comment,
	68	UErrorCode *status) {
b75a7d8f A	69	enum ETokenType result;
	70	UChar32 c;
	71
	72	if (U_FAILURE(*status)) {
	73	return TOK_ERROR;
	74	}
	75
	76	/* Skip whitespace */
374ca955	77	c = getNextChar(buf, TRUE, comment, status);
b75a7d8f A	78
	79	if (U_FAILURE(*status)) {
	80	return TOK_ERROR;
	81	}
	82
	83	*linenumber = lineCount;
	84
	85	switch(c) {
	86	case BADBOM:
	87	return TOK_ERROR;
	88	case OPENBRACE:
	89	return TOK_OPEN_BRACE;
	90	case CLOSEBRACE:
	91	return TOK_CLOSE_BRACE;
	92	case COMMA:
	93	return TOK_COMMA;
	94	case U_EOF:
	95	return TOK_EOF;
	96	case COLON:
	97	return TOK_COLON;
	98
	99	default:
	100	result = getStringToken(buf, c, token, status);
	101	}
	102
	103	*linenumber = lineCount;
	104	return result;
	105	}
	106
	107	/* Copy a string token into the given UnicodeString. Upon entry, we
	108	have already read the first character of the string token, which is
	109	not a whitespace character (but may be a QUOTE or ESCAPE). This
	110	function reads all subsequent characters that belong with this
	111	string, and copy them into the token parameter. The other
	112	important, and slightly convoluted purpose of this function is to
	113	merge adjacent strings. It looks forward a bit, and if the next
	114	non comment, non whitespace item is a string, it reads it in as
	115	well. If two adjacent strings are quoted, they are merged without
	116	intervening space. Otherwise a single SPACE character is
	117	inserted. */
	118	static enum ETokenType getStringToken(UCHARBUF* buf,
	119	UChar32 initialChar,
	120	struct UString *token,
	121	UErrorCode *status) {
	122	UBool lastStringWasQuoted;
	123	UChar32 c;
	124	UChar target[3] = { '\0' };
	125	UChar *pTarget = target;
	126	int len=0;
	127	UBool isFollowingCharEscaped=FALSE;
374ca955 A	128	UBool isNLUnescaped = FALSE;
374ca955 A	129	UChar32 prevC=0;
b75a7d8f A	130
	131	/* We are guaranteed on entry that initialChar is not a whitespace
	132	character. If we are at the EOF, or have some other problem, it
	133	doesn't matter; we still want to validly return the initialChar
	134	(if nothing else) as a string token. */
	135
	136	if (U_FAILURE(*status)) {
	137	return TOK_ERROR;
	138	}
	139
	140	/* setup */
	141	lastStringWasQuoted = FALSE;
	142	c = initialChar;
	143	ustr_setlen(token, 0, status);
	144
	145	if (U_FAILURE(*status)) {
	146	return TOK_ERROR;
	147	}
	148
	149	for (;;) {
	150	if (c == QUOTE) {
	151	if (!lastStringWasQuoted && token->fLength > 0) {
	152	ustr_ucat(token, SPACE, status);
	153
	154	if (U_FAILURE(*status)) {
	155	return TOK_ERROR;
	156	}
	157	}
	158
	159	lastStringWasQuoted = TRUE;
	160
	161	for (;;) {
	162	c = ucbuf_getc(buf,status);
	163
	164	/* EOF reached */
	165	if (c == U_EOF) {
	166	return TOK_EOF;
	167	}
	168
	169	/* Unterminated quoted strings */
	170	if (U_FAILURE(*status)) {
	171	return TOK_ERROR;
	172	}
	173
	174	if (c == QUOTE && !isFollowingCharEscaped) {
	175	break;
	176	}
	177
	178	if (c == ESCAPE && !isFollowingCharEscaped) {
	179	pTarget = target;
	180	c = unescape(buf, status);
	181
	182	if (c == U_ERR) {
	183	return TOK_ERROR;
	184	}
374ca955 A	185	if(c == CR \|\| c == LF){
	186	isNLUnescaped = TRUE;
	187	}
b75a7d8f A	188	}
	189
	190	if(c==ESCAPE && !isFollowingCharEscaped){
	191	isFollowingCharEscaped = TRUE;
	192	}else{
	193	U_APPEND_CHAR32(c, pTarget,len);
	194	pTarget = target;
	195	ustr_uscat(token, pTarget,len, status);
	196	isFollowingCharEscaped = FALSE;
	197	len=0;
374ca955 A	198	if(c == CR \|\| c == LF){
	199	if(isNLUnescaped == FALSE && prevC!=CR){
	200	lineCount++;
	201	}
	202	isNLUnescaped = FALSE;
	203	}
b75a7d8f A	204	}
	205
	206	if (U_FAILURE(*status)) {
	207	return TOK_ERROR;
	208	}
374ca955	209	prevC = c;
b75a7d8f A	210	}
	211	} else {
	212	if (token->fLength > 0) {
	213	ustr_ucat(token, SPACE, status);
	214
	215	if (U_FAILURE(*status)) {
	216	return TOK_ERROR;
	217	}
	218	}
	219
	220	if(lastStringWasQuoted){
	221	if(getShowWarning()){
	222	warning(lineCount, "Mixing quoted and unquoted strings");
	223	}
	224	if(isStrict()){
	225	return TOK_ERROR;
	226	}
	227
	228	}
	229
	230	lastStringWasQuoted = FALSE;
	231
	232	/* if we reach here we are mixing
	233	* quoted and unquoted strings
	234	* warn in normal mode and error in
	235	* pedantic mode
	236	*/
	237
	238	if (c == ESCAPE) {
	239	pTarget = target;
	240	c = unescape(buf, status);
	241
	242	/* EOF reached */
	243	if (c == U_EOF) {
	244	return TOK_ERROR;
	245	}
	246	}
	247
	248	U_APPEND_CHAR32(c, pTarget,len);
	249	pTarget = target;
	250	ustr_uscat(token, pTarget,len, status);
	251	len=0;
374ca955	252
b75a7d8f A	253	if (U_FAILURE(*status)) {
	254	return TOK_ERROR;
	255	}
	256
	257	for (;;) {
	258	/* DON'T skip whitespace */
374ca955	259	c = getNextChar(buf, FALSE, NULL, status);
b75a7d8f A	260
	261	/* EOF reached */
	262	if (c == U_EOF) {
	263	ucbuf_ungetc(c, buf);
	264	return TOK_STRING;
	265	}
	266
	267	if (U_FAILURE(*status)) {
	268	return TOK_STRING;
	269	}
	270
	271	if (c == QUOTE
	272	\|\| c == OPENBRACE
	273	\|\| c == CLOSEBRACE
	274	\|\| c == COMMA
	275	\|\| c == COLON) {
	276	ucbuf_ungetc(c, buf);
	277	break;
	278	}
	279
	280	if (isWhitespace(c)) {
	281	break;
	282	}
	283
	284	if (c == ESCAPE) {
	285	pTarget = target;
	286	c = unescape(buf, status);
	287
	288	if (c == U_ERR) {
	289	return TOK_ERROR;
	290	}
	291	}
	292
	293	U_APPEND_CHAR32(c, pTarget,len);
	294	pTarget = target;
	295	ustr_uscat(token, pTarget,len, status);
	296	len=0;
	297	if (U_FAILURE(*status)) {
	298	return TOK_ERROR;
	299	}
	300	}
	301	}
	302
	303	/* DO skip whitespace */
374ca955	304	c = getNextChar(buf, TRUE, NULL, status);
b75a7d8f A	305
	306	if (U_FAILURE(*status)) {
	307	return TOK_STRING;
	308	}
	309
	310	if (c == OPENBRACE \|\| c == CLOSEBRACE \|\| c == COMMA \|\| c == COLON) {
	311	ucbuf_ungetc(c, buf);
	312	return TOK_STRING;
	313	}
	314	}
	315	}
	316
374ca955	317	/* Retrieve the next character. If skipwhite is
b75a7d8f A	318	true, whitespace is skipped as well. */
	319	static UChar32 getNextChar(UCHARBUF* buf,
	320	UBool skipwhite,
374ca955	321	struct UString *token,
b75a7d8f	322	UErrorCode *status) {
374ca955	323	UChar32 c, c2;
b75a7d8f A	324
	325	if (U_FAILURE(*status)) {
	326	return U_EOF;
	327	}
	328
	329	for (;;) {
	330	c = ucbuf_getc(buf,status);
	331
	332	if (c == U_EOF) {
	333	return U_EOF;
	334	}
	335
	336	if (skipwhite && isWhitespace(c)) {
	337	continue;
	338	}
	339
	340	/* This also handles the get() failing case */
	341	if (c != SLASH) {
	342	return c;
	343	}
	344
46f4442e	345	c = ucbuf_getc(buf,status); /* "/c" */
b75a7d8f A	346
	347	if (c == U_EOF) {
	348	return U_EOF;
	349	}
	350
	351	switch (c) {
46f4442e	352	case SLASH: /* "//" */
374ca955	353	seekUntilNewline(buf, NULL, status);
b75a7d8f A	354	break;
b75a7d8f A	355
729e4ab9 A	356	case ASTERISK: /* " / * " */
	357	c2 = ucbuf_getc(buf, status); /* "/ * c" */
	358	if(c2 == ASTERISK){ /* "/ * " /
374ca955 A	359	/* parse multi-line comment and store it in token*/
374ca955 A	360	seekUntilEndOfComment(buf, token, status);
46f4442e	361	} else {
729e4ab9	362	ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ ". Include c2 back in buffer. /
374ca955 A	363	seekUntilEndOfComment(buf, NULL, status);
374ca955 A	364	}
b75a7d8f A	365	break;
	366
	367	default:
46f4442e	368	ucbuf_ungetc(c, buf); /* "/c" - put back the c */
b75a7d8f A	369	/* If get() failed this is a NOP */
	370	return SLASH;
	371	}
374ca955	372
b75a7d8f A	373	}
	374	}
	375
	376	static void seekUntilNewline(UCHARBUF* buf,
374ca955	377	struct UString *token,
b75a7d8f A	378	UErrorCode *status) {
	379	UChar32 c;
	380
	381	if (U_FAILURE(*status)) {
	382	return;
	383	}
	384
	385	do {
	386	c = ucbuf_getc(buf,status);
374ca955 A	387	/* add the char to token */
	388	if(token!=NULL){
	389	ustr_u32cat(token, c, status);
	390	}
b75a7d8f A	391	} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
	392	}
	393
	394	static void seekUntilEndOfComment(UCHARBUF *buf,
374ca955	395	struct UString *token,
b75a7d8f A	396	UErrorCode *status) {
	397	UChar32 c, d;
	398	uint32_t line;
	399
	400	if (U_FAILURE(*status)) {
	401	return;
	402	}
	403
	404	line = lineCount;
	405
	406	do {
	407	c = ucbuf_getc(buf, status);
	408
	409	if (c == ASTERISK) {
	410	d = ucbuf_getc(buf, status);
	411
	412	if (d != SLASH) {
	413	ucbuf_ungetc(d, buf);
	414	} else {
	415	break;
	416	}
	417	}
374ca955 A	418	/* add the char to token */
	419	if(token!=NULL){
	420	ustr_u32cat(token, c, status);
	421	}
	422	/* increment the lineCount */
	423	isNewline(c);
	424
b75a7d8f A	425	} while (c != U_EOF && *status == U_ZERO_ERROR);
	426
	427	if (c == U_EOF) {
	428	*status = U_INVALID_FORMAT_ERROR;
	429	error(line, "unterminated comment detected");
	430	}
	431	}
	432
4388f060	433	U_CFUNC UChar32 unescape(UCHARBUF buf, UErrorCode status) {
b75a7d8f A	434	if (U_FAILURE(*status)) {
	435	return U_EOF;
	436	}
	437
	438	/* We expect to be called after the ESCAPE has been seen, but
	439	* u_fgetcx needs an ESCAPE to do its magic. */
	440	ucbuf_ungetc(ESCAPE, buf);
	441
	442	return ucbuf_getcx32(buf, status);
	443	}
	444
	445	static UBool isWhitespace(UChar32 c) {
	446	switch (c) {
	447	/* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
	448	case 0x000A:
	449	case 0x2029:
	450	lineCount++;
	451	case 0x000D:
	452	case 0x0020:
	453	case 0x0009:
	454	case 0xFEFF:
	455	return TRUE;
	456
	457	default:
	458	return FALSE;
	459	}
	460	}
	461
	462	static UBool isNewline(UChar32 c) {
	463	switch (c) {
	464	/* '\n', '\r', 0x2029 */
	465	case 0x000A:
	466	case 0x2029:
	467	lineCount++;
	468	case 0x000D:
	469	return TRUE;
	470
	471	default:
	472	return FALSE;
	473	}
	474	}