/*
*******************************************************************************
*
-* Copyright (C) 1998-2003, International Business Machines
+* Copyright (C) 1998-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "read.h"
#include "errmsg.h"
#include "unicode/ustring.h"
+#include "unicode/utf16.h"
#define OPENBRACE 0x007B
#define CLOSEBRACE 0x007D
#define SPACE 0x0020
#define COLON 0x003A
#define BADBOM 0xFFFE
-
+#define CR 0x000D
+#define LF 0x000A
+
static int32_t lineCount;
/* Protos */
struct UString *token,
UErrorCode *status);
-static UChar32 getNextChar (UCHARBUF *buf, UBool skipwhite, UErrorCode *status);
-static void seekUntilNewline (UCHARBUF *buf, UErrorCode *status);
-static void seekUntilEndOfComment (UCHARBUF *buf, UErrorCode *status);
+static UChar32 getNextChar (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
+static void seekUntilNewline (UCHARBUF *buf, struct UString *token, UErrorCode *status);
+static void seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
static UBool isWhitespace (UChar32 c);
static UBool isNewline (UChar32 c);
-void resetLineNumber() {
+U_CFUNC void resetLineNumber() {
lineCount = 1;
}
never return eString twice in a row; instead, multiple adjacent
string tokens will be merged into one, with no intervening
space. */
-enum ETokenType getNextToken(UCHARBUF* buf,
- struct UString *token,
- uint32_t *linenumber, /* out: linenumber of token */
- UErrorCode *status) {
+U_CFUNC enum ETokenType
+getNextToken(UCHARBUF* buf,
+ struct UString *token,
+ uint32_t *linenumber, /* out: linenumber of token */
+ struct UString *comment,
+ UErrorCode *status) {
enum ETokenType result;
UChar32 c;
}
/* Skip whitespace */
- c = getNextChar(buf, TRUE, status);
+ c = getNextChar(buf, TRUE, comment, status);
if (U_FAILURE(*status)) {
return TOK_ERROR;
UChar *pTarget = target;
int len=0;
UBool isFollowingCharEscaped=FALSE;
+ UBool isNLUnescaped = FALSE;
+ UChar32 prevC=0;
/* We are guaranteed on entry that initialChar is not a whitespace
character. If we are at the EOF, or have some other problem, it
if (c == U_ERR) {
return TOK_ERROR;
}
+ if(c == CR || c == LF){
+ isNLUnescaped = TRUE;
+ }
}
if(c==ESCAPE && !isFollowingCharEscaped){
ustr_uscat(token, pTarget,len, status);
isFollowingCharEscaped = FALSE;
len=0;
+ if(c == CR || c == LF){
+ if(isNLUnescaped == FALSE && prevC!=CR){
+ lineCount++;
+ }
+ isNLUnescaped = FALSE;
+ }
}
if (U_FAILURE(*status)) {
return TOK_ERROR;
}
+ prevC = c;
}
} else {
if (token->fLength > 0) {
pTarget = target;
ustr_uscat(token, pTarget,len, status);
len=0;
-
+
if (U_FAILURE(*status)) {
return TOK_ERROR;
}
for (;;) {
/* DON'T skip whitespace */
- c = getNextChar(buf, FALSE, status);
+ c = getNextChar(buf, FALSE, NULL, status);
/* EOF reached */
if (c == U_EOF) {
}
/* DO skip whitespace */
- c = getNextChar(buf, TRUE, status);
+ c = getNextChar(buf, TRUE, NULL, status);
if (U_FAILURE(*status)) {
return TOK_STRING;
}
}
-/* Retrieve the next character, ignoring comments. If skipwhite is
+/* Retrieve the next character. If skipwhite is
true, whitespace is skipped as well. */
static UChar32 getNextChar(UCHARBUF* buf,
UBool skipwhite,
+ struct UString *token,
UErrorCode *status) {
- UChar32 c;
+ UChar32 c, c2;
if (U_FAILURE(*status)) {
return U_EOF;
return c;
}
- c = ucbuf_getc(buf,status);
+ c = ucbuf_getc(buf,status); /* "/c" */
if (c == U_EOF) {
return U_EOF;
}
switch (c) {
- case SLASH:
- seekUntilNewline(buf, status);
+ case SLASH: /* "//" */
+ seekUntilNewline(buf, NULL, status);
break;
- case ASTERISK:
- seekUntilEndOfComment(buf, status);
+ case ASTERISK: /* " / * " */
+ c2 = ucbuf_getc(buf, status); /* "/ * c" */
+ if(c2 == ASTERISK){ /* "/ * *" */
+ /* parse multi-line comment and store it in token*/
+ seekUntilEndOfComment(buf, token, status);
+ } else {
+ ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *". Include c2 back in buffer. */
+ seekUntilEndOfComment(buf, NULL, status);
+ }
break;
default:
- ucbuf_ungetc(c, buf);
+ ucbuf_ungetc(c, buf); /* "/c" - put back the c */
/* If get() failed this is a NOP */
return SLASH;
}
+
}
}
static void seekUntilNewline(UCHARBUF* buf,
+ struct UString *token,
UErrorCode *status) {
UChar32 c;
do {
c = ucbuf_getc(buf,status);
+ /* add the char to token */
+ if(token!=NULL){
+ ustr_u32cat(token, c, status);
+ }
} while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
}
static void seekUntilEndOfComment(UCHARBUF *buf,
+ struct UString *token,
UErrorCode *status) {
UChar32 c, d;
uint32_t line;
break;
}
}
+ /* add the char to token */
+ if(token!=NULL){
+ ustr_u32cat(token, c, status);
+ }
+ /* increment the lineCount */
+ isNewline(c);
+
} while (c != U_EOF && *status == U_ZERO_ERROR);
if (c == U_EOF) {
}
}
-UChar32 unescape(UCHARBUF *buf,
- UErrorCode *status) {
+U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
if (U_FAILURE(*status)) {
return U_EOF;
}