]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ********************************************************************** | |
73c04bcf | 3 | * Copyright (c) 2003-2005, International Business Machines |
374ca955 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Author: Alan Liu | |
7 | * Created: September 24 2003 | |
8 | * Since: ICU 2.8 | |
9 | ********************************************************************** | |
10 | */ | |
11 | #include "ruleiter.h" | |
12 | #include "unicode/parsepos.h" | |
13 | #include "unicode/unistr.h" | |
14 | #include "unicode/symtable.h" | |
15 | #include "util.h" | |
16 | ||
17 | U_NAMESPACE_BEGIN | |
18 | ||
19 | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, | |
20 | ParsePosition& thePos) : | |
21 | text(theText), | |
22 | pos(thePos), | |
23 | sym(theSym), | |
24 | buf(0) | |
25 | {} | |
26 | ||
27 | UBool RuleCharacterIterator::atEnd() const { | |
28 | return buf == 0 && pos.getIndex() == text.length(); | |
29 | } | |
30 | ||
31 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | |
32 | if (U_FAILURE(ec)) return DONE; | |
33 | ||
34 | UChar32 c = DONE; | |
35 | isEscaped = FALSE; | |
36 | ||
37 | for (;;) { | |
38 | c = _current(); | |
39 | _advance(UTF_CHAR_LENGTH(c)); | |
40 | ||
41 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && | |
42 | (options & PARSE_VARIABLES) != 0 && sym != 0) { | |
43 | UnicodeString name = sym->parseReference(text, pos, text.length()); | |
44 | // If name is empty there was an isolated SYMBOL_REF; | |
45 | // return it. Caller must be prepared for this. | |
46 | if (name.length() == 0) { | |
47 | break; | |
48 | } | |
49 | bufPos = 0; | |
50 | buf = sym->lookup(name); | |
51 | if (buf == 0) { | |
52 | ec = U_UNDEFINED_VARIABLE; | |
53 | return DONE; | |
54 | } | |
55 | // Handle empty variable value | |
56 | if (buf->length() == 0) { | |
57 | buf = 0; | |
58 | } | |
59 | continue; | |
60 | } | |
61 | ||
62 | if ((options & SKIP_WHITESPACE) != 0 && | |
63 | uprv_isRuleWhiteSpace(c)) { | |
64 | continue; | |
65 | } | |
66 | ||
67 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | |
68 | UnicodeString s; | |
69 | int32_t offset = 0; | |
70 | c = lookahead(s).unescapeAt(offset); | |
71 | jumpahead(offset); | |
72 | isEscaped = TRUE; | |
73 | if (c < 0) { | |
74 | ec = U_MALFORMED_UNICODE_ESCAPE; | |
75 | return DONE; | |
76 | } | |
77 | } | |
78 | ||
79 | break; | |
80 | } | |
81 | ||
82 | return c; | |
83 | } | |
84 | ||
85 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | |
86 | p.buf = buf; | |
87 | p.pos = pos.getIndex(); | |
88 | p.bufPos = bufPos; | |
89 | } | |
90 | ||
91 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | |
92 | buf = p.buf; | |
93 | pos.setIndex(p.pos); | |
94 | bufPos = p.bufPos; | |
95 | } | |
96 | ||
97 | void RuleCharacterIterator::skipIgnored(int32_t options) { | |
98 | if ((options & SKIP_WHITESPACE) != 0) { | |
99 | for (;;) { | |
100 | UChar32 a = _current(); | |
101 | if (!uprv_isRuleWhiteSpace(a)) break; | |
102 | _advance(UTF_CHAR_LENGTH(a)); | |
103 | } | |
104 | } | |
105 | } | |
106 | ||
107 | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result) const { | |
108 | if (buf != 0) { | |
109 | buf->extract(bufPos, 0x7FFFFFFF, result); | |
110 | } else { | |
111 | text.extract(pos.getIndex(), 0x7FFFFFFF, result); | |
112 | } | |
113 | return result; | |
114 | } | |
115 | ||
116 | void RuleCharacterIterator::jumpahead(int32_t count) { | |
117 | _advance(count); | |
118 | } | |
119 | ||
73c04bcf | 120 | /* |
374ca955 A |
121 | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
122 | int32_t b = pos.getIndex(); | |
123 | text.extract(0, b, result); | |
73c04bcf | 124 | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
374ca955 | 125 | } |
73c04bcf | 126 | */ |
374ca955 A |
127 | |
128 | UChar32 RuleCharacterIterator::_current() const { | |
129 | if (buf != 0) { | |
130 | return buf->char32At(bufPos); | |
131 | } else { | |
132 | int i = pos.getIndex(); | |
133 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; | |
134 | } | |
135 | } | |
136 | ||
137 | void RuleCharacterIterator::_advance(int32_t count) { | |
138 | if (buf != 0) { | |
139 | bufPos += count; | |
140 | if (bufPos == buf->length()) { | |
141 | buf = 0; | |
142 | } | |
143 | } else { | |
144 | pos.setIndex(pos.getIndex() + count); | |
145 | if (pos.getIndex() > text.length()) { | |
146 | pos.setIndex(text.length()); | |
147 | } | |
148 | } | |
149 | } | |
150 | ||
151 | U_NAMESPACE_END | |
152 | ||
153 | //eof |