]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ruleiter.cpp
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / common / ruleiter.cpp
CommitLineData
374ca955
A
1/*
2**********************************************************************
4388f060 3* Copyright (c) 2003-2011, International Business Machines
374ca955
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: September 24 2003
8* Since: ICU 2.8
9**********************************************************************
10*/
11#include "ruleiter.h"
12#include "unicode/parsepos.h"
374ca955 13#include "unicode/symtable.h"
4388f060
A
14#include "unicode/unistr.h"
15#include "unicode/utf16.h"
16#include "patternprops.h"
374ca955 17
46f4442e
A
18/* \U87654321 or \ud800\udc00 */
19#define MAX_U_NOTATION_LEN 12
20
374ca955
A
21U_NAMESPACE_BEGIN
22
23RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
24 ParsePosition& thePos) :
25 text(theText),
26 pos(thePos),
27 sym(theSym),
46f4442e
A
28 buf(0),
29 bufPos(0)
374ca955
A
30{}
31
32UBool RuleCharacterIterator::atEnd() const {
33 return buf == 0 && pos.getIndex() == text.length();
34}
35
36UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
37 if (U_FAILURE(ec)) return DONE;
38
39 UChar32 c = DONE;
40 isEscaped = FALSE;
41
42 for (;;) {
43 c = _current();
4388f060 44 _advance(U16_LENGTH(c));
374ca955
A
45
46 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
47 (options & PARSE_VARIABLES) != 0 && sym != 0) {
48 UnicodeString name = sym->parseReference(text, pos, text.length());
49 // If name is empty there was an isolated SYMBOL_REF;
50 // return it. Caller must be prepared for this.
51 if (name.length() == 0) {
52 break;
53 }
54 bufPos = 0;
55 buf = sym->lookup(name);
56 if (buf == 0) {
57 ec = U_UNDEFINED_VARIABLE;
58 return DONE;
59 }
60 // Handle empty variable value
61 if (buf->length() == 0) {
62 buf = 0;
63 }
64 continue;
65 }
66
4388f060 67 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
374ca955
A
68 continue;
69 }
70
71 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
46f4442e 72 UnicodeString tempEscape;
374ca955 73 int32_t offset = 0;
46f4442e 74 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
374ca955
A
75 jumpahead(offset);
76 isEscaped = TRUE;
77 if (c < 0) {
78 ec = U_MALFORMED_UNICODE_ESCAPE;
79 return DONE;
80 }
81 }
82
83 break;
84 }
85
86 return c;
87}
88
89void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
90 p.buf = buf;
91 p.pos = pos.getIndex();
92 p.bufPos = bufPos;
93}
94
95void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
96 buf = p.buf;
97 pos.setIndex(p.pos);
98 bufPos = p.bufPos;
99}
100
101void RuleCharacterIterator::skipIgnored(int32_t options) {
102 if ((options & SKIP_WHITESPACE) != 0) {
103 for (;;) {
104 UChar32 a = _current();
4388f060
A
105 if (!PatternProps::isWhiteSpace(a)) break;
106 _advance(U16_LENGTH(a));
374ca955
A
107 }
108 }
109}
110
46f4442e
A
111UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
112 if (maxLookAhead < 0) {
113 maxLookAhead = 0x7FFFFFFF;
114 }
374ca955 115 if (buf != 0) {
46f4442e 116 buf->extract(bufPos, maxLookAhead, result);
374ca955 117 } else {
46f4442e 118 text.extract(pos.getIndex(), maxLookAhead, result);
374ca955
A
119 }
120 return result;
121}
122
123void RuleCharacterIterator::jumpahead(int32_t count) {
124 _advance(count);
125}
126
73c04bcf 127/*
374ca955
A
128UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
129 int32_t b = pos.getIndex();
130 text.extract(0, b, result);
73c04bcf 131 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
374ca955 132}
73c04bcf 133*/
374ca955
A
134
135UChar32 RuleCharacterIterator::_current() const {
136 if (buf != 0) {
137 return buf->char32At(bufPos);
138 } else {
139 int i = pos.getIndex();
140 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
141 }
142}
143
144void RuleCharacterIterator::_advance(int32_t count) {
145 if (buf != 0) {
146 bufPos += count;
147 if (bufPos == buf->length()) {
148 buf = 0;
149 }
150 } else {
151 pos.setIndex(pos.getIndex() + count);
152 if (pos.getIndex() > text.length()) {
153 pos.setIndex(text.length());
154 }
155 }
156}
157
158U_NAMESPACE_END
159
160//eof