2 *******************************************************************************
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 5/10/01 Ram removed ustdio dependency
16 *******************************************************************************
21 #include "unicode/ustring.h"
23 #define OPENBRACE 0x007B
24 #define CLOSEBRACE 0x007D
29 #define ASTERISK 0x002A
36 static int32_t lineCount
;
39 static enum ETokenType
getStringToken(UCHARBUF
*buf
,
41 struct UString
*token
,
44 static UChar32
getNextChar (UCHARBUF
*buf
, UBool skipwhite
, struct UString
*token
, UErrorCode
*status
);
45 static void seekUntilNewline (UCHARBUF
*buf
, struct UString
*token
, UErrorCode
*status
);
46 static void seekUntilEndOfComment (UCHARBUF
*buf
, struct UString
*token
, UErrorCode
*status
);
47 static UBool
isWhitespace (UChar32 c
);
48 static UBool
isNewline (UChar32 c
);
50 void resetLineNumber() {
54 /* Read and return the next token from the stream. If the token is of
55 type eString, fill in the token parameter with the token. If the
56 token is eError, then the status parameter will contain the
57 specific error. This will be eItemNotFound at the end of file,
58 indicating that all tokens have been returned. This method will
59 never return eString twice in a row; instead, multiple adjacent
60 string tokens will be merged into one, with no intervening
62 enum ETokenType
getNextToken(UCHARBUF
* buf
,
63 struct UString
*token
,
64 uint32_t *linenumber
, /* out: linenumber of token */
65 struct UString
*comment
,
67 enum ETokenType result
;
70 if (U_FAILURE(*status
)) {
75 c
= getNextChar(buf
, TRUE
, comment
, status
);
77 if (U_FAILURE(*status
)) {
81 *linenumber
= lineCount
;
87 return TOK_OPEN_BRACE
;
89 return TOK_CLOSE_BRACE
;
98 result
= getStringToken(buf
, c
, token
, status
);
101 *linenumber
= lineCount
;
105 /* Copy a string token into the given UnicodeString. Upon entry, we
106 have already read the first character of the string token, which is
107 not a whitespace character (but may be a QUOTE or ESCAPE). This
108 function reads all subsequent characters that belong with this
109 string, and copy them into the token parameter. The other
110 important, and slightly convoluted purpose of this function is to
111 merge adjacent strings. It looks forward a bit, and if the next
112 non comment, non whitespace item is a string, it reads it in as
113 well. If two adjacent strings are quoted, they are merged without
114 intervening space. Otherwise a single SPACE character is
116 static enum ETokenType
getStringToken(UCHARBUF
* buf
,
118 struct UString
*token
,
119 UErrorCode
*status
) {
120 UBool lastStringWasQuoted
;
122 UChar target
[3] = { '\0' };
123 UChar
*pTarget
= target
;
125 UBool isFollowingCharEscaped
=FALSE
;
126 UBool isNLUnescaped
= FALSE
;
129 /* We are guaranteed on entry that initialChar is not a whitespace
130 character. If we are at the EOF, or have some other problem, it
131 doesn't matter; we still want to validly return the initialChar
132 (if nothing else) as a string token. */
134 if (U_FAILURE(*status
)) {
139 lastStringWasQuoted
= FALSE
;
141 ustr_setlen(token
, 0, status
);
143 if (U_FAILURE(*status
)) {
149 if (!lastStringWasQuoted
&& token
->fLength
> 0) {
150 ustr_ucat(token
, SPACE
, status
);
152 if (U_FAILURE(*status
)) {
157 lastStringWasQuoted
= TRUE
;
160 c
= ucbuf_getc(buf
,status
);
167 /* Unterminated quoted strings */
168 if (U_FAILURE(*status
)) {
172 if (c
== QUOTE
&& !isFollowingCharEscaped
) {
176 if (c
== ESCAPE
&& !isFollowingCharEscaped
) {
178 c
= unescape(buf
, status
);
183 if(c
== CR
|| c
== LF
){
184 isNLUnescaped
= TRUE
;
188 if(c
==ESCAPE
&& !isFollowingCharEscaped
){
189 isFollowingCharEscaped
= TRUE
;
191 U_APPEND_CHAR32(c
, pTarget
,len
);
193 ustr_uscat(token
, pTarget
,len
, status
);
194 isFollowingCharEscaped
= FALSE
;
196 if(c
== CR
|| c
== LF
){
197 if(isNLUnescaped
== FALSE
&& prevC
!=CR
){
200 isNLUnescaped
= FALSE
;
204 if (U_FAILURE(*status
)) {
210 if (token
->fLength
> 0) {
211 ustr_ucat(token
, SPACE
, status
);
213 if (U_FAILURE(*status
)) {
218 if(lastStringWasQuoted
){
219 if(getShowWarning()){
220 warning(lineCount
, "Mixing quoted and unquoted strings");
228 lastStringWasQuoted
= FALSE
;
230 /* if we reach here we are mixing
231 * quoted and unquoted strings
232 * warn in normal mode and error in
238 c
= unescape(buf
, status
);
246 U_APPEND_CHAR32(c
, pTarget
,len
);
248 ustr_uscat(token
, pTarget
,len
, status
);
251 if (U_FAILURE(*status
)) {
256 /* DON'T skip whitespace */
257 c
= getNextChar(buf
, FALSE
, NULL
, status
);
261 ucbuf_ungetc(c
, buf
);
265 if (U_FAILURE(*status
)) {
274 ucbuf_ungetc(c
, buf
);
278 if (isWhitespace(c
)) {
284 c
= unescape(buf
, status
);
291 U_APPEND_CHAR32(c
, pTarget
,len
);
293 ustr_uscat(token
, pTarget
,len
, status
);
295 if (U_FAILURE(*status
)) {
301 /* DO skip whitespace */
302 c
= getNextChar(buf
, TRUE
, NULL
, status
);
304 if (U_FAILURE(*status
)) {
308 if (c
== OPENBRACE
|| c
== CLOSEBRACE
|| c
== COMMA
|| c
== COLON
) {
309 ucbuf_ungetc(c
, buf
);
315 /* Retrieve the next character. If skipwhite is
316 true, whitespace is skipped as well. */
317 static UChar32
getNextChar(UCHARBUF
* buf
,
319 struct UString
*token
,
320 UErrorCode
*status
) {
323 if (U_FAILURE(*status
)) {
328 c
= ucbuf_getc(buf
,status
);
334 if (skipwhite
&& isWhitespace(c
)) {
338 /* This also handles the get() failing case */
343 c
= ucbuf_getc(buf
,status
); /* "/c" */
350 case SLASH
: /* "//" */
351 seekUntilNewline(buf
, NULL
, status
);
354 case ASTERISK
: /* "/*" */
355 c2
= ucbuf_getc(buf
, status
); /* "/*c" */
356 if(c2
== ASTERISK
){ /* "/**" */
357 /* parse multi-line comment and store it in token*/
358 seekUntilEndOfComment(buf
, token
, status
);
360 ucbuf_ungetc(c2
, buf
); /* c2 is the non-asterisk following "/*". Include c2 back in buffer. */
361 seekUntilEndOfComment(buf
, NULL
, status
);
366 ucbuf_ungetc(c
, buf
); /* "/c" - put back the c */
367 /* If get() failed this is a NOP */
374 static void seekUntilNewline(UCHARBUF
* buf
,
375 struct UString
*token
,
376 UErrorCode
*status
) {
379 if (U_FAILURE(*status
)) {
384 c
= ucbuf_getc(buf
,status
);
385 /* add the char to token */
387 ustr_u32cat(token
, c
, status
);
389 } while (!isNewline(c
) && c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
392 static void seekUntilEndOfComment(UCHARBUF
*buf
,
393 struct UString
*token
,
394 UErrorCode
*status
) {
398 if (U_FAILURE(*status
)) {
405 c
= ucbuf_getc(buf
, status
);
408 d
= ucbuf_getc(buf
, status
);
411 ucbuf_ungetc(d
, buf
);
416 /* add the char to token */
418 ustr_u32cat(token
, c
, status
);
420 /* increment the lineCount */
423 } while (c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
426 *status
= U_INVALID_FORMAT_ERROR
;
427 error(line
, "unterminated comment detected");
431 UChar32
unescape(UCHARBUF
*buf
,
432 UErrorCode
*status
) {
433 if (U_FAILURE(*status
)) {
437 /* We expect to be called after the ESCAPE has been seen, but
438 * u_fgetcx needs an ESCAPE to do its magic. */
439 ucbuf_ungetc(ESCAPE
, buf
);
441 return ucbuf_getcx32(buf
, status
);
444 static UBool
isWhitespace(UChar32 c
) {
446 /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
461 static UBool
isNewline(UChar32 c
) {
463 /* '\n', '\r', 0x2029 */