]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
374ca955 A |
3 | /* |
4 | ********************************************************************** | |
4388f060 | 5 | * Copyright (c) 2003-2011, International Business Machines |
374ca955 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** | |
8 | * Author: Alan Liu | |
9 | * Created: September 24 2003 | |
10 | * Since: ICU 2.8 | |
11 | ********************************************************************** | |
12 | */ | |
13 | #include "ruleiter.h" | |
14 | #include "unicode/parsepos.h" | |
374ca955 | 15 | #include "unicode/symtable.h" |
4388f060 A |
16 | #include "unicode/unistr.h" |
17 | #include "unicode/utf16.h" | |
18 | #include "patternprops.h" | |
374ca955 | 19 | |
46f4442e A |
20 | /* \U87654321 or \ud800\udc00 */ |
21 | #define MAX_U_NOTATION_LEN 12 | |
22 | ||
374ca955 A |
23 | U_NAMESPACE_BEGIN |
24 | ||
25 | RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, | |
26 | ParsePosition& thePos) : | |
27 | text(theText), | |
28 | pos(thePos), | |
29 | sym(theSym), | |
46f4442e A |
30 | buf(0), |
31 | bufPos(0) | |
374ca955 A |
32 | {} |
33 | ||
34 | UBool RuleCharacterIterator::atEnd() const { | |
35 | return buf == 0 && pos.getIndex() == text.length(); | |
36 | } | |
37 | ||
38 | UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { | |
39 | if (U_FAILURE(ec)) return DONE; | |
40 | ||
41 | UChar32 c = DONE; | |
42 | isEscaped = FALSE; | |
43 | ||
44 | for (;;) { | |
45 | c = _current(); | |
4388f060 | 46 | _advance(U16_LENGTH(c)); |
374ca955 A |
47 | |
48 | if (c == SymbolTable::SYMBOL_REF && buf == 0 && | |
49 | (options & PARSE_VARIABLES) != 0 && sym != 0) { | |
50 | UnicodeString name = sym->parseReference(text, pos, text.length()); | |
51 | // If name is empty there was an isolated SYMBOL_REF; | |
52 | // return it. Caller must be prepared for this. | |
53 | if (name.length() == 0) { | |
54 | break; | |
55 | } | |
56 | bufPos = 0; | |
57 | buf = sym->lookup(name); | |
58 | if (buf == 0) { | |
59 | ec = U_UNDEFINED_VARIABLE; | |
60 | return DONE; | |
61 | } | |
62 | // Handle empty variable value | |
63 | if (buf->length() == 0) { | |
64 | buf = 0; | |
65 | } | |
66 | continue; | |
67 | } | |
68 | ||
4388f060 | 69 | if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { |
374ca955 A |
70 | continue; |
71 | } | |
72 | ||
73 | if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { | |
46f4442e | 74 | UnicodeString tempEscape; |
374ca955 | 75 | int32_t offset = 0; |
46f4442e | 76 | c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); |
374ca955 A |
77 | jumpahead(offset); |
78 | isEscaped = TRUE; | |
79 | if (c < 0) { | |
80 | ec = U_MALFORMED_UNICODE_ESCAPE; | |
81 | return DONE; | |
82 | } | |
83 | } | |
84 | ||
85 | break; | |
86 | } | |
87 | ||
88 | return c; | |
89 | } | |
90 | ||
91 | void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { | |
92 | p.buf = buf; | |
93 | p.pos = pos.getIndex(); | |
94 | p.bufPos = bufPos; | |
95 | } | |
96 | ||
97 | void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { | |
98 | buf = p.buf; | |
99 | pos.setIndex(p.pos); | |
100 | bufPos = p.bufPos; | |
101 | } | |
102 | ||
103 | void RuleCharacterIterator::skipIgnored(int32_t options) { | |
104 | if ((options & SKIP_WHITESPACE) != 0) { | |
105 | for (;;) { | |
106 | UChar32 a = _current(); | |
4388f060 A |
107 | if (!PatternProps::isWhiteSpace(a)) break; |
108 | _advance(U16_LENGTH(a)); | |
374ca955 A |
109 | } |
110 | } | |
111 | } | |
112 | ||
46f4442e A |
113 | UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { |
114 | if (maxLookAhead < 0) { | |
115 | maxLookAhead = 0x7FFFFFFF; | |
116 | } | |
374ca955 | 117 | if (buf != 0) { |
46f4442e | 118 | buf->extract(bufPos, maxLookAhead, result); |
374ca955 | 119 | } else { |
46f4442e | 120 | text.extract(pos.getIndex(), maxLookAhead, result); |
374ca955 A |
121 | } |
122 | return result; | |
123 | } | |
124 | ||
125 | void RuleCharacterIterator::jumpahead(int32_t count) { | |
126 | _advance(count); | |
127 | } | |
128 | ||
73c04bcf | 129 | /* |
374ca955 A |
130 | UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { |
131 | int32_t b = pos.getIndex(); | |
132 | text.extract(0, b, result); | |
73c04bcf | 133 | return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index |
374ca955 | 134 | } |
73c04bcf | 135 | */ |
374ca955 A |
136 | |
137 | UChar32 RuleCharacterIterator::_current() const { | |
138 | if (buf != 0) { | |
139 | return buf->char32At(bufPos); | |
140 | } else { | |
141 | int i = pos.getIndex(); | |
142 | return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; | |
143 | } | |
144 | } | |
145 | ||
146 | void RuleCharacterIterator::_advance(int32_t count) { | |
147 | if (buf != 0) { | |
148 | bufPos += count; | |
149 | if (bufPos == buf->length()) { | |
150 | buf = 0; | |
151 | } | |
152 | } else { | |
153 | pos.setIndex(pos.getIndex() + count); | |
154 | if (pos.getIndex() > text.length()) { | |
155 | pos.setIndex(text.length()); | |
156 | } | |
157 | } | |
158 | } | |
159 | ||
160 | U_NAMESPACE_END | |
161 | ||
162 | //eof |