2 *******************************************************************************
4 * Copyright (C) 1998-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/26/99 stephen Creation.
15 * 5/10/01 Ram removed ustdio dependency
16 *******************************************************************************
21 #include "unicode/ustring.h"
23 #define OPENBRACE 0x007B
24 #define CLOSEBRACE 0x007D
29 #define ASTERISK 0x002A
36 static int32_t lineCount
;
39 static enum ETokenType
getStringToken(UCHARBUF
*buf
,
41 struct UString
*token
,
44 static UChar32
getNextChar (UCHARBUF
*buf
, UBool skipwhite
, struct UString
*token
, UErrorCode
*status
);
45 static void seekUntilNewline (UCHARBUF
*buf
, struct UString
*token
, UErrorCode
*status
);
46 static void seekUntilEndOfComment (UCHARBUF
*buf
, struct UString
*token
, UErrorCode
*status
);
47 static UBool
isWhitespace (UChar32 c
);
48 static UBool
isNewline (UChar32 c
);
50 U_CFUNC
void resetLineNumber() {
54 /* Read and return the next token from the stream. If the token is of
55 type eString, fill in the token parameter with the token. If the
56 token is eError, then the status parameter will contain the
57 specific error. This will be eItemNotFound at the end of file,
58 indicating that all tokens have been returned. This method will
59 never return eString twice in a row; instead, multiple adjacent
60 string tokens will be merged into one, with no intervening
62 U_CFUNC
enum ETokenType
63 getNextToken(UCHARBUF
* buf
,
64 struct UString
*token
,
65 uint32_t *linenumber
, /* out: linenumber of token */
66 struct UString
*comment
,
68 enum ETokenType result
;
71 if (U_FAILURE(*status
)) {
76 c
= getNextChar(buf
, TRUE
, comment
, status
);
78 if (U_FAILURE(*status
)) {
82 *linenumber
= lineCount
;
88 return TOK_OPEN_BRACE
;
90 return TOK_CLOSE_BRACE
;
99 result
= getStringToken(buf
, c
, token
, status
);
102 *linenumber
= lineCount
;
106 /* Copy a string token into the given UnicodeString. Upon entry, we
107 have already read the first character of the string token, which is
108 not a whitespace character (but may be a QUOTE or ESCAPE). This
109 function reads all subsequent characters that belong with this
110 string, and copy them into the token parameter. The other
111 important, and slightly convoluted purpose of this function is to
112 merge adjacent strings. It looks forward a bit, and if the next
113 non comment, non whitespace item is a string, it reads it in as
114 well. If two adjacent strings are quoted, they are merged without
115 intervening space. Otherwise a single SPACE character is
117 static enum ETokenType
getStringToken(UCHARBUF
* buf
,
119 struct UString
*token
,
120 UErrorCode
*status
) {
121 UBool lastStringWasQuoted
;
123 UChar target
[3] = { '\0' };
124 UChar
*pTarget
= target
;
126 UBool isFollowingCharEscaped
=FALSE
;
127 UBool isNLUnescaped
= FALSE
;
130 /* We are guaranteed on entry that initialChar is not a whitespace
131 character. If we are at the EOF, or have some other problem, it
132 doesn't matter; we still want to validly return the initialChar
133 (if nothing else) as a string token. */
135 if (U_FAILURE(*status
)) {
140 lastStringWasQuoted
= FALSE
;
142 ustr_setlen(token
, 0, status
);
144 if (U_FAILURE(*status
)) {
150 if (!lastStringWasQuoted
&& token
->fLength
> 0) {
151 ustr_ucat(token
, SPACE
, status
);
153 if (U_FAILURE(*status
)) {
158 lastStringWasQuoted
= TRUE
;
161 c
= ucbuf_getc(buf
,status
);
168 /* Unterminated quoted strings */
169 if (U_FAILURE(*status
)) {
173 if (c
== QUOTE
&& !isFollowingCharEscaped
) {
177 if (c
== ESCAPE
&& !isFollowingCharEscaped
) {
179 c
= unescape(buf
, status
);
184 if(c
== CR
|| c
== LF
){
185 isNLUnescaped
= TRUE
;
189 if(c
==ESCAPE
&& !isFollowingCharEscaped
){
190 isFollowingCharEscaped
= TRUE
;
192 U_APPEND_CHAR32(c
, pTarget
,len
);
194 ustr_uscat(token
, pTarget
,len
, status
);
195 isFollowingCharEscaped
= FALSE
;
197 if(c
== CR
|| c
== LF
){
198 if(isNLUnescaped
== FALSE
&& prevC
!=CR
){
201 isNLUnescaped
= FALSE
;
205 if (U_FAILURE(*status
)) {
211 if (token
->fLength
> 0) {
212 ustr_ucat(token
, SPACE
, status
);
214 if (U_FAILURE(*status
)) {
219 if(lastStringWasQuoted
){
220 if(getShowWarning()){
221 warning(lineCount
, "Mixing quoted and unquoted strings");
229 lastStringWasQuoted
= FALSE
;
231 /* if we reach here we are mixing
232 * quoted and unquoted strings
233 * warn in normal mode and error in
239 c
= unescape(buf
, status
);
247 U_APPEND_CHAR32(c
, pTarget
,len
);
249 ustr_uscat(token
, pTarget
,len
, status
);
252 if (U_FAILURE(*status
)) {
257 /* DON'T skip whitespace */
258 c
= getNextChar(buf
, FALSE
, NULL
, status
);
262 ucbuf_ungetc(c
, buf
);
266 if (U_FAILURE(*status
)) {
275 ucbuf_ungetc(c
, buf
);
279 if (isWhitespace(c
)) {
285 c
= unescape(buf
, status
);
292 U_APPEND_CHAR32(c
, pTarget
,len
);
294 ustr_uscat(token
, pTarget
,len
, status
);
296 if (U_FAILURE(*status
)) {
302 /* DO skip whitespace */
303 c
= getNextChar(buf
, TRUE
, NULL
, status
);
305 if (U_FAILURE(*status
)) {
309 if (c
== OPENBRACE
|| c
== CLOSEBRACE
|| c
== COMMA
|| c
== COLON
) {
310 ucbuf_ungetc(c
, buf
);
316 /* Retrieve the next character. If skipwhite is
317 true, whitespace is skipped as well. */
318 static UChar32
getNextChar(UCHARBUF
* buf
,
320 struct UString
*token
,
321 UErrorCode
*status
) {
324 if (U_FAILURE(*status
)) {
329 c
= ucbuf_getc(buf
,status
);
335 if (skipwhite
&& isWhitespace(c
)) {
339 /* This also handles the get() failing case */
344 c
= ucbuf_getc(buf
,status
); /* "/c" */
351 case SLASH
: /* "//" */
352 seekUntilNewline(buf
, NULL
, status
);
355 case ASTERISK
: /* " / * " */
356 c2
= ucbuf_getc(buf
, status
); /* "/ * c" */
357 if(c2
== ASTERISK
){ /* "/ * *" */
358 /* parse multi-line comment and store it in token*/
359 seekUntilEndOfComment(buf
, token
, status
);
361 ucbuf_ungetc(c2
, buf
); /* c2 is the non-asterisk following "/ *". Include c2 back in buffer. */
362 seekUntilEndOfComment(buf
, NULL
, status
);
367 ucbuf_ungetc(c
, buf
); /* "/c" - put back the c */
368 /* If get() failed this is a NOP */
375 static void seekUntilNewline(UCHARBUF
* buf
,
376 struct UString
*token
,
377 UErrorCode
*status
) {
380 if (U_FAILURE(*status
)) {
385 c
= ucbuf_getc(buf
,status
);
386 /* add the char to token */
388 ustr_u32cat(token
, c
, status
);
390 } while (!isNewline(c
) && c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
393 static void seekUntilEndOfComment(UCHARBUF
*buf
,
394 struct UString
*token
,
395 UErrorCode
*status
) {
399 if (U_FAILURE(*status
)) {
406 c
= ucbuf_getc(buf
, status
);
409 d
= ucbuf_getc(buf
, status
);
412 ucbuf_ungetc(d
, buf
);
417 /* add the char to token */
419 ustr_u32cat(token
, c
, status
);
421 /* increment the lineCount */
424 } while (c
!= U_EOF
&& *status
== U_ZERO_ERROR
);
427 *status
= U_INVALID_FORMAT_ERROR
;
428 error(line
, "unterminated comment detected");
432 U_CFUNC UChar32
unescape(UCHARBUF
*buf
, UErrorCode
*status
) {
433 if (U_FAILURE(*status
)) {
437 /* We expect to be called after the ESCAPE has been seen, but
438 * u_fgetcx needs an ESCAPE to do its magic. */
439 ucbuf_ungetc(ESCAPE
, buf
);
441 return ucbuf_getcx32(buf
, status
);
444 static UBool
isWhitespace(UChar32 c
) {
446 /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
461 static UBool
isNewline(UChar32 c
) {
463 /* '\n', '\r', 0x2029 */