2 **********************************************************************
3 * Copyright (c) 2003-2005, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
7 * Created: September 24 2003
9 **********************************************************************
12 #include "unicode/parsepos.h"
13 #include "unicode/unistr.h"
14 #include "unicode/symtable.h"
19 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString
& theText
, const SymbolTable
* theSym
,
20 ParsePosition
& thePos
) :
27 UBool
RuleCharacterIterator::atEnd() const {
28 return buf
== 0 && pos
.getIndex() == text
.length();
31 UChar32
RuleCharacterIterator::next(int32_t options
, UBool
& isEscaped
, UErrorCode
& ec
) {
32 if (U_FAILURE(ec
)) return DONE
;
39 _advance(UTF_CHAR_LENGTH(c
));
41 if (c
== SymbolTable::SYMBOL_REF
&& buf
== 0 &&
42 (options
& PARSE_VARIABLES
) != 0 && sym
!= 0) {
43 UnicodeString name
= sym
->parseReference(text
, pos
, text
.length());
44 // If name is empty there was an isolated SYMBOL_REF;
45 // return it. Caller must be prepared for this.
46 if (name
.length() == 0) {
50 buf
= sym
->lookup(name
);
52 ec
= U_UNDEFINED_VARIABLE
;
55 // Handle empty variable value
56 if (buf
->length() == 0) {
62 if ((options
& SKIP_WHITESPACE
) != 0 &&
63 uprv_isRuleWhiteSpace(c
)) {
67 if (c
== 0x5C /*'\\'*/ && (options
& PARSE_ESCAPES
) != 0) {
70 c
= lookahead(s
).unescapeAt(offset
);
74 ec
= U_MALFORMED_UNICODE_ESCAPE
;
85 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos
& p
) const {
87 p
.pos
= pos
.getIndex();
91 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos
& p
) {
97 void RuleCharacterIterator::skipIgnored(int32_t options
) {
98 if ((options
& SKIP_WHITESPACE
) != 0) {
100 UChar32 a
= _current();
101 if (!uprv_isRuleWhiteSpace(a
)) break;
102 _advance(UTF_CHAR_LENGTH(a
));
107 UnicodeString
& RuleCharacterIterator::lookahead(UnicodeString
& result
) const {
109 buf
->extract(bufPos
, 0x7FFFFFFF, result
);
111 text
.extract(pos
.getIndex(), 0x7FFFFFFF, result
);
116 void RuleCharacterIterator::jumpahead(int32_t count
) {
121 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
122 int32_t b = pos.getIndex();
123 text.extract(0, b, result);
124 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
128 UChar32
RuleCharacterIterator::_current() const {
130 return buf
->char32At(bufPos
);
132 int i
= pos
.getIndex();
133 return (i
< text
.length()) ? text
.char32At(i
) : (UChar32
)DONE
;
137 void RuleCharacterIterator::_advance(int32_t count
) {
140 if (bufPos
== buf
->length()) {
144 pos
.setIndex(pos
.getIndex() + count
);
145 if (pos
.getIndex() > text
.length()) {
146 pos
.setIndex(text
.length());