]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ********************************************************************** | |
4388f060 | 3 | * Copyright (c) 2003-2011, International Business Machines |
374ca955 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Author: Alan Liu | |
7 | * Created: September 24 2003 | |
8 | * Since: ICU 2.8 | |
9 | ********************************************************************** | |
10 | */ | |
11 | #include "ruleiter.h" | |
12 | #include "unicode/parsepos.h" | |
374ca955 | 13 | #include "unicode/symtable.h" |
4388f060 A |
14 | #include "unicode/unistr.h" |
15 | #include "unicode/utf16.h" | |
16 | #include "patternprops.h" | |
374ca955 | 17 | |
46f4442e A |
18 | /* \U87654321 or \ud800\udc00 */ |
19 | #define MAX_U_NOTATION_LEN 12 | |
20 | ||
374ca955 A |
21 | U_NAMESPACE_BEGIN |
22 | ||
23 | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, | |
24 | ParsePosition& thePos) : | |
25 | text(theText), | |
26 | pos(thePos), | |
27 | sym(theSym), | |
46f4442e A |
28 | buf(0), |
29 | bufPos(0) | |
374ca955 A |
30 | {} |
31 | ||
32 | UBool RuleCharacterIterator::atEnd() const { | |
33 | return buf == 0 && pos.getIndex() == text.length(); | |
34 | } | |
35 | ||
36 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | |
37 | if (U_FAILURE(ec)) return DONE; | |
38 | ||
39 | UChar32 c = DONE; | |
40 | isEscaped = FALSE; | |
41 | ||
42 | for (;;) { | |
43 | c = _current(); | |
4388f060 | 44 | _advance(U16_LENGTH(c)); |
374ca955 A |
45 | |
46 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && | |
47 | (options & PARSE_VARIABLES) != 0 && sym != 0) { | |
48 | UnicodeString name = sym->parseReference(text, pos, text.length()); | |
49 | // If name is empty there was an isolated SYMBOL_REF; | |
50 | // return it. Caller must be prepared for this. | |
51 | if (name.length() == 0) { | |
52 | break; | |
53 | } | |
54 | bufPos = 0; | |
55 | buf = sym->lookup(name); | |
56 | if (buf == 0) { | |
57 | ec = U_UNDEFINED_VARIABLE; | |
58 | return DONE; | |
59 | } | |
60 | // Handle empty variable value | |
61 | if (buf->length() == 0) { | |
62 | buf = 0; | |
63 | } | |
64 | continue; | |
65 | } | |
66 | ||
4388f060 | 67 | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { |
374ca955 A |
68 | continue; |
69 | } | |
70 | ||
71 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | |
46f4442e | 72 | UnicodeString tempEscape; |
374ca955 | 73 | int32_t offset = 0; |
46f4442e | 74 | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
374ca955 A |
75 | jumpahead(offset); |
76 | isEscaped = TRUE; | |
77 | if (c < 0) { | |
78 | ec = U_MALFORMED_UNICODE_ESCAPE; | |
79 | return DONE; | |
80 | } | |
81 | } | |
82 | ||
83 | break; | |
84 | } | |
85 | ||
86 | return c; | |
87 | } | |
88 | ||
89 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | |
90 | p.buf = buf; | |
91 | p.pos = pos.getIndex(); | |
92 | p.bufPos = bufPos; | |
93 | } | |
94 | ||
95 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | |
96 | buf = p.buf; | |
97 | pos.setIndex(p.pos); | |
98 | bufPos = p.bufPos; | |
99 | } | |
100 | ||
101 | void RuleCharacterIterator::skipIgnored(int32_t options) { | |
102 | if ((options & SKIP_WHITESPACE) != 0) { | |
103 | for (;;) { | |
104 | UChar32 a = _current(); | |
4388f060 A |
105 | if (!PatternProps::isWhiteSpace(a)) break; |
106 | _advance(U16_LENGTH(a)); | |
374ca955 A |
107 | } |
108 | } | |
109 | } | |
110 | ||
46f4442e A |
111 | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { |
112 | if (maxLookAhead < 0) { | |
113 | maxLookAhead = 0x7FFFFFFF; | |
114 | } | |
374ca955 | 115 | if (buf != 0) { |
46f4442e | 116 | buf->extract(bufPos, maxLookAhead, result); |
374ca955 | 117 | } else { |
46f4442e | 118 | text.extract(pos.getIndex(), maxLookAhead, result); |
374ca955 A |
119 | } |
120 | return result; | |
121 | } | |
122 | ||
123 | void RuleCharacterIterator::jumpahead(int32_t count) { | |
124 | _advance(count); | |
125 | } | |
126 | ||
73c04bcf | 127 | /* |
374ca955 A |
128 | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
129 | int32_t b = pos.getIndex(); | |
130 | text.extract(0, b, result); | |
73c04bcf | 131 | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
374ca955 | 132 | } |
73c04bcf | 133 | */ |
374ca955 A |
134 | |
135 | UChar32 RuleCharacterIterator::_current() const { | |
136 | if (buf != 0) { | |
137 | return buf->char32At(bufPos); | |
138 | } else { | |
139 | int i = pos.getIndex(); | |
140 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; | |
141 | } | |
142 | } | |
143 | ||
144 | void RuleCharacterIterator::_advance(int32_t count) { | |
145 | if (buf != 0) { | |
146 | bufPos += count; | |
147 | if (bufPos == buf->length()) { | |
148 | buf = 0; | |
149 | } | |
150 | } else { | |
151 | pos.setIndex(pos.getIndex() + count); | |
152 | if (pos.getIndex() > text.length()) { | |
153 | pos.setIndex(text.length()); | |
154 | } | |
155 | } | |
156 | } | |
157 | ||
158 | U_NAMESPACE_END | |
159 | ||
160 | //eof |