]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ruleiter.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / ruleiter.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4**********************************************************************
4388f060 5* Copyright (c) 2003-2011, International Business Machines
374ca955
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Author: Alan Liu
9* Created: September 24 2003
10* Since: ICU 2.8
11**********************************************************************
12*/
13#include "ruleiter.h"
14#include "unicode/parsepos.h"
374ca955 15#include "unicode/symtable.h"
4388f060
A
16#include "unicode/unistr.h"
17#include "unicode/utf16.h"
18#include "patternprops.h"
374ca955 19
46f4442e
A
20/* \U87654321 or \ud800\udc00 */
21#define MAX_U_NOTATION_LEN 12
22
374ca955
A
23U_NAMESPACE_BEGIN
24
25RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
26 ParsePosition& thePos) :
27 text(theText),
28 pos(thePos),
29 sym(theSym),
46f4442e
A
30 buf(0),
31 bufPos(0)
374ca955
A
32{}
33
34UBool RuleCharacterIterator::atEnd() const {
35 return buf == 0 && pos.getIndex() == text.length();
36}
37
38UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
39 if (U_FAILURE(ec)) return DONE;
40
41 UChar32 c = DONE;
42 isEscaped = FALSE;
43
44 for (;;) {
45 c = _current();
4388f060 46 _advance(U16_LENGTH(c));
374ca955
A
47
48 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
49 (options & PARSE_VARIABLES) != 0 && sym != 0) {
50 UnicodeString name = sym->parseReference(text, pos, text.length());
51 // If name is empty there was an isolated SYMBOL_REF;
52 // return it. Caller must be prepared for this.
53 if (name.length() == 0) {
54 break;
55 }
56 bufPos = 0;
57 buf = sym->lookup(name);
58 if (buf == 0) {
59 ec = U_UNDEFINED_VARIABLE;
60 return DONE;
61 }
62 // Handle empty variable value
63 if (buf->length() == 0) {
64 buf = 0;
65 }
66 continue;
67 }
68
4388f060 69 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
374ca955
A
70 continue;
71 }
72
73 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
46f4442e 74 UnicodeString tempEscape;
374ca955 75 int32_t offset = 0;
46f4442e 76 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
374ca955
A
77 jumpahead(offset);
78 isEscaped = TRUE;
79 if (c < 0) {
80 ec = U_MALFORMED_UNICODE_ESCAPE;
81 return DONE;
82 }
83 }
84
85 break;
86 }
87
88 return c;
89}
90
91void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
92 p.buf = buf;
93 p.pos = pos.getIndex();
94 p.bufPos = bufPos;
95}
96
97void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
98 buf = p.buf;
99 pos.setIndex(p.pos);
100 bufPos = p.bufPos;
101}
102
103void RuleCharacterIterator::skipIgnored(int32_t options) {
104 if ((options & SKIP_WHITESPACE) != 0) {
105 for (;;) {
106 UChar32 a = _current();
4388f060
A
107 if (!PatternProps::isWhiteSpace(a)) break;
108 _advance(U16_LENGTH(a));
374ca955
A
109 }
110 }
111}
112
46f4442e
A
113UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
114 if (maxLookAhead < 0) {
115 maxLookAhead = 0x7FFFFFFF;
116 }
374ca955 117 if (buf != 0) {
46f4442e 118 buf->extract(bufPos, maxLookAhead, result);
374ca955 119 } else {
46f4442e 120 text.extract(pos.getIndex(), maxLookAhead, result);
374ca955
A
121 }
122 return result;
123}
124
125void RuleCharacterIterator::jumpahead(int32_t count) {
126 _advance(count);
127}
128
73c04bcf 129/*
374ca955
A
130UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
131 int32_t b = pos.getIndex();
132 text.extract(0, b, result);
73c04bcf 133 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
374ca955 134}
73c04bcf 135*/
374ca955
A
136
137UChar32 RuleCharacterIterator::_current() const {
138 if (buf != 0) {
139 return buf->char32At(bufPos);
140 } else {
141 int i = pos.getIndex();
142 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
143 }
144}
145
146void RuleCharacterIterator::_advance(int32_t count) {
147 if (buf != 0) {
148 bufPos += count;
149 if (bufPos == buf->length()) {
150 buf = 0;
151 }
152 } else {
153 pos.setIndex(pos.getIndex() + count);
154 if (pos.getIndex() > text.length()) {
155 pos.setIndex(text.length());
156 }
157 }
158}
159
160U_NAMESPACE_END
161
162//eof