2 **********************************************************************
3 * Copyright (c) 2003-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: September 24 2003
9 **********************************************************************
12 #include "unicode/parsepos.h"
13 #include "unicode/symtable.h"
14 #include "unicode/unistr.h"
15 #include "unicode/utf16.h"
16 #include "patternprops.h"
18 /* \U87654321 or \ud800\udc00 */
19 #define MAX_U_NOTATION_LEN 12
23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString
& theText
, const SymbolTable
* theSym
,
24 ParsePosition
& thePos
) :
32 UBool
RuleCharacterIterator::atEnd() const {
33 return buf
== 0 && pos
.getIndex() == text
.length();
36 UChar32
RuleCharacterIterator::next(int32_t options
, UBool
& isEscaped
, UErrorCode
& ec
) {
37 if (U_FAILURE(ec
)) return DONE
;
44 _advance(U16_LENGTH(c
));
46 if (c
== SymbolTable::SYMBOL_REF
&& buf
== 0 &&
47 (options
& PARSE_VARIABLES
) != 0 && sym
!= 0) {
48 UnicodeString name
= sym
->parseReference(text
, pos
, text
.length());
49 // If name is empty there was an isolated SYMBOL_REF;
50 // return it. Caller must be prepared for this.
51 if (name
.length() == 0) {
55 buf
= sym
->lookup(name
);
57 ec
= U_UNDEFINED_VARIABLE
;
60 // Handle empty variable value
61 if (buf
->length() == 0) {
67 if ((options
& SKIP_WHITESPACE
) != 0 && PatternProps::isWhiteSpace(c
)) {
71 if (c
== 0x5C /*'\\'*/ && (options
& PARSE_ESCAPES
) != 0) {
72 UnicodeString tempEscape
;
74 c
= lookahead(tempEscape
, MAX_U_NOTATION_LEN
).unescapeAt(offset
);
78 ec
= U_MALFORMED_UNICODE_ESCAPE
;
89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos
& p
) const {
91 p
.pos
= pos
.getIndex();
95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos
& p
) {
101 void RuleCharacterIterator::skipIgnored(int32_t options
) {
102 if ((options
& SKIP_WHITESPACE
) != 0) {
104 UChar32 a
= _current();
105 if (!PatternProps::isWhiteSpace(a
)) break;
106 _advance(U16_LENGTH(a
));
111 UnicodeString
& RuleCharacterIterator::lookahead(UnicodeString
& result
, int32_t maxLookAhead
) const {
112 if (maxLookAhead
< 0) {
113 maxLookAhead
= 0x7FFFFFFF;
116 buf
->extract(bufPos
, maxLookAhead
, result
);
118 text
.extract(pos
.getIndex(), maxLookAhead
, result
);
123 void RuleCharacterIterator::jumpahead(int32_t count
) {
128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
129 int32_t b = pos.getIndex();
130 text.extract(0, b, result);
131 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
135 UChar32
RuleCharacterIterator::_current() const {
137 return buf
->char32At(bufPos
);
139 int i
= pos
.getIndex();
140 return (i
< text
.length()) ? text
.char32At(i
) : (UChar32
)DONE
;
144 void RuleCharacterIterator::_advance(int32_t count
) {
147 if (bufPos
== buf
->length()) {
151 pos
.setIndex(pos
.getIndex() + count
);
152 if (pos
.getIndex() > text
.length()) {
153 pos
.setIndex(text
.length());