2 *******************************************************************************
4 * Copyright (C) 1998-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 5/10/01 Ram removed ustdio dependency
16 *******************************************************************************
21 #include "unicode/ustring.h"
23 #define OPENBRACE 0x007B
24 #define CLOSEBRACE 0x007D
29 #define ASTERISK 0x002A
34 static int32_t lineCount
;
37 static enum ETokenType
getStringToken(UCHARBUF
*buf
,
39 struct UString
*token
,
42 static UChar32
getNextChar (UCHARBUF
*buf
, UBool skipwhite
, UErrorCode
*status
);
43 static void seekUntilNewline (UCHARBUF
*buf
, UErrorCode
*status
);
44 static void seekUntilEndOfComment (UCHARBUF
*buf
, UErrorCode
*status
);
45 static UBool
isWhitespace (UChar32 c
);
46 static UBool
isNewline (UChar32 c
);
48 void resetLineNumber() {
52 /* Read and return the next token from the stream. If the token is of
53 type eString, fill in the token parameter with the token. If the
54 token is eError, then the status parameter will contain the
55 specific error. This will be eItemNotFound at the end of file,
56 indicating that all tokens have been returned. This method will
57 never return eString twice in a row; instead, multiple adjacent
58 string tokens will be merged into one, with no intervening
60 enum ETokenType
getNextToken(UCHARBUF
* buf
,
61 struct UString
*token
,
62 uint32_t *linenumber
, /* out: linenumber of token */
64 enum ETokenType result
;
67 if (U_FAILURE(*status
)) {
72 c
= getNextChar(buf
, TRUE
, status
);
74 if (U_FAILURE(*status
)) {
78 *linenumber
= lineCount
;
84 return TOK_OPEN_BRACE
;
86 return TOK_CLOSE_BRACE
;
95 result
= getStringToken(buf
, c
, token
, status
);
98 *linenumber
= lineCount
;
102 /* Copy a string token into the given UnicodeString. Upon entry, we
103 have already read the first character of the string token, which is
104 not a whitespace character (but may be a QUOTE or ESCAPE). This
105 function reads all subsequent characters that belong with this
106 string, and copy them into the token parameter. The other
107 important, and slightly convoluted purpose of this function is to
108 merge adjacent strings. It looks forward a bit, and if the next
109 non comment, non whitespace item is a string, it reads it in as
110 well. If two adjacent strings are quoted, they are merged without
111 intervening space. Otherwise a single SPACE character is
113 static enum ETokenType
getStringToken(UCHARBUF
* buf
,
115 struct UString
*token
,
116 UErrorCode
*status
) {
117 UBool lastStringWasQuoted
;
119 UChar target
[3] = { '\0' };
120 UChar
*pTarget
= target
;
122 UBool isFollowingCharEscaped
=FALSE
;
124 /* We are guaranteed on entry that initialChar is not a whitespace
125 character. If we are at the EOF, or have some other problem, it
126 doesn't matter; we still want to validly return the initialChar
127 (if nothing else) as a string token. */
129 if (U_FAILURE(*status
)) {
134 lastStringWasQuoted
= FALSE
;
136 ustr_setlen(token
, 0, status
);
138 if (U_FAILURE(*status
)) {
144 if (!lastStringWasQuoted
&& token
->fLength
> 0) {
145 ustr_ucat(token
, SPACE
, status
);
147 if (U_FAILURE(*status
)) {
152 lastStringWasQuoted
= TRUE
;
155 c
= ucbuf_getc(buf
,status
);
162 /* Unterminated quoted strings */
163 if (U_FAILURE(*status
)) {
167 if (c
== QUOTE
&& !isFollowingCharEscaped
) {
171 if (c
== ESCAPE
&& !isFollowingCharEscaped
) {
173 c
= unescape(buf
, status
);
180 if(c
==ESCAPE
&& !isFollowingCharEscaped
){
181 isFollowingCharEscaped
= TRUE
;
183 U_APPEND_CHAR32(c
, pTarget
,len
);
185 ustr_uscat(token
, pTarget
,len
, status
);
186 isFollowingCharEscaped
= FALSE
;
190 if (U_FAILURE(*status
)) {
195 if (token
->fLength
> 0) {
196 ustr_ucat(token
, SPACE
, status
);
198 if (U_FAILURE(*status
)) {
203 if(lastStringWasQuoted
){
204 if(getShowWarning()){
205 warning(lineCount
, "Mixing quoted and unquoted strings");
213 lastStringWasQuoted
= FALSE
;
215 /* if we reach here we are mixing
216 * quoted and unquoted strings
217 * warn in normal mode and error in
223 c
= unescape(buf
, status
);
231 U_APPEND_CHAR32(c
, pTarget
,len
);
233 ustr_uscat(token
, pTarget
,len
, status
);
236 if (U_FAILURE(*status
)) {
241 /* DON'T skip whitespace */
242 c
= getNextChar(buf
, FALSE
, status
);
246 ucbuf_ungetc(c
, buf
);
250 if (U_FAILURE(*status
)) {
259 ucbuf_ungetc(c
, buf
);
263 if (isWhitespace(c
)) {
269 c
= unescape(buf
, status
);
276 U_APPEND_CHAR32(c
, pTarget
,len
);
278 ustr_uscat(token
, pTarget
,len
, status
);
280 if (U_FAILURE(*status
)) {
286 /* DO skip whitespace */
287 c
= getNextChar(buf
, TRUE
, status
);
289 if (U_FAILURE(*status
)) {
293 if (c
== OPENBRACE
|| c
== CLOSEBRACE
|| c
== COMMA
|| c
== COLON
) {
294 ucbuf_ungetc(c
, buf
);
300 /* Retrieve the next character, ignoring comments. If skipwhite is
301 true, whitespace is skipped as well. */
302 static UChar32
getNextChar(UCHARBUF
* buf
,
304 UErrorCode
*status
) {
307 if (U_FAILURE(*status
)) {
312 c
= ucbuf_getc(buf
,status
);
318 if (skipwhite
&& isWhitespace(c
)) {
322 /* This also handles the get() failing case */
327 c
= ucbuf_getc(buf
,status
);
335 seekUntilNewline(buf
, status
);
339 seekUntilEndOfComment(buf
, status
);
343 ucbuf_ungetc(c
, buf
);
344 /* If get() failed this is a NOP */
350 static void seekUntilNewline(UCHARBUF
* buf
,
351 UErrorCode
*status
) {
354 if (U_FAILURE(*status
)) {
359 c
= ucbuf_getc(buf
,status
);
360 } while (!isNewline(c
) && c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
363 static void seekUntilEndOfComment(UCHARBUF
*buf
,
364 UErrorCode
*status
) {
368 if (U_FAILURE(*status
)) {
375 c
= ucbuf_getc(buf
, status
);
378 d
= ucbuf_getc(buf
, status
);
381 ucbuf_ungetc(d
, buf
);
386 } while (c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
389 *status
= U_INVALID_FORMAT_ERROR
;
390 error(line
, "unterminated comment detected");
394 UChar32
unescape(UCHARBUF
*buf
,
395 UErrorCode
*status
) {
396 if (U_FAILURE(*status
)) {
400 /* We expect to be called after the ESCAPE has been seen, but
401 * u_fgetcx needs an ESCAPE to do its magic. */
402 ucbuf_ungetc(ESCAPE
, buf
);
404 return ucbuf_getcx32(buf
, status
);
407 static UBool
isWhitespace(UChar32 c
) {
409 /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
424 static UBool
isNewline(UChar32 c
) {
426 /* '\n', '\r', 0x2029 */