]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ruleiter.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / ruleiter.cpp
1 /*
2 **********************************************************************
3 * Copyright (c) 2003-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Author: Alan Liu
7 * Created: September 24 2003
8 * Since: ICU 2.8
9 **********************************************************************
10 */
11 #include "ruleiter.h"
12 #include "unicode/parsepos.h"
13 #include "unicode/unistr.h"
14 #include "unicode/symtable.h"
15 #include "util.h"
16
17 U_NAMESPACE_BEGIN
18
19 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
20 ParsePosition& thePos) :
21 text(theText),
22 pos(thePos),
23 sym(theSym),
24 buf(0)
25 {}
26
27 UBool RuleCharacterIterator::atEnd() const {
28 return buf == 0 && pos.getIndex() == text.length();
29 }
30
31 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
32 if (U_FAILURE(ec)) return DONE;
33
34 UChar32 c = DONE;
35 isEscaped = FALSE;
36
37 for (;;) {
38 c = _current();
39 _advance(UTF_CHAR_LENGTH(c));
40
41 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
42 (options & PARSE_VARIABLES) != 0 && sym != 0) {
43 UnicodeString name = sym->parseReference(text, pos, text.length());
44 // If name is empty there was an isolated SYMBOL_REF;
45 // return it. Caller must be prepared for this.
46 if (name.length() == 0) {
47 break;
48 }
49 bufPos = 0;
50 buf = sym->lookup(name);
51 if (buf == 0) {
52 ec = U_UNDEFINED_VARIABLE;
53 return DONE;
54 }
55 // Handle empty variable value
56 if (buf->length() == 0) {
57 buf = 0;
58 }
59 continue;
60 }
61
62 if ((options & SKIP_WHITESPACE) != 0 &&
63 uprv_isRuleWhiteSpace(c)) {
64 continue;
65 }
66
67 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
68 UnicodeString s;
69 int32_t offset = 0;
70 c = lookahead(s).unescapeAt(offset);
71 jumpahead(offset);
72 isEscaped = TRUE;
73 if (c < 0) {
74 ec = U_MALFORMED_UNICODE_ESCAPE;
75 return DONE;
76 }
77 }
78
79 break;
80 }
81
82 return c;
83 }
84
85 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
86 p.buf = buf;
87 p.pos = pos.getIndex();
88 p.bufPos = bufPos;
89 }
90
91 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
92 buf = p.buf;
93 pos.setIndex(p.pos);
94 bufPos = p.bufPos;
95 }
96
97 void RuleCharacterIterator::skipIgnored(int32_t options) {
98 if ((options & SKIP_WHITESPACE) != 0) {
99 for (;;) {
100 UChar32 a = _current();
101 if (!uprv_isRuleWhiteSpace(a)) break;
102 _advance(UTF_CHAR_LENGTH(a));
103 }
104 }
105 }
106
107 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result) const {
108 if (buf != 0) {
109 buf->extract(bufPos, 0x7FFFFFFF, result);
110 } else {
111 text.extract(pos.getIndex(), 0x7FFFFFFF, result);
112 }
113 return result;
114 }
115
116 void RuleCharacterIterator::jumpahead(int32_t count) {
117 _advance(count);
118 }
119
120 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
121 int32_t b = pos.getIndex();
122 text.extract(0, b, result);
123 return result.append((UChar) 0x7C /*'|'*/).append(text, b, 0x7FFFFFFF);
124 }
125
126 UChar32 RuleCharacterIterator::_current() const {
127 if (buf != 0) {
128 return buf->char32At(bufPos);
129 } else {
130 int i = pos.getIndex();
131 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
132 }
133 }
134
135 void RuleCharacterIterator::_advance(int32_t count) {
136 if (buf != 0) {
137 bufPos += count;
138 if (bufPos == buf->length()) {
139 buf = 0;
140 }
141 } else {
142 pos.setIndex(pos.getIndex() + count);
143 if (pos.getIndex() > text.length()) {
144 pos.setIndex(text.length());
145 }
146 }
147 }
148
149 U_NAMESPACE_END
150
151 //eof