]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ruleiter.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / ruleiter.cpp
CommitLineData
374ca955
A
1/*
2**********************************************************************
73c04bcf 3* Copyright (c) 2003-2005, International Business Machines
374ca955
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: September 24 2003
8* Since: ICU 2.8
9**********************************************************************
10*/
11#include "ruleiter.h"
12#include "unicode/parsepos.h"
13#include "unicode/unistr.h"
14#include "unicode/symtable.h"
15#include "util.h"
16
17U_NAMESPACE_BEGIN
18
19RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
20 ParsePosition& thePos) :
21 text(theText),
22 pos(thePos),
23 sym(theSym),
24 buf(0)
25{}
26
27UBool RuleCharacterIterator::atEnd() const {
28 return buf == 0 && pos.getIndex() == text.length();
29}
30
31UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
32 if (U_FAILURE(ec)) return DONE;
33
34 UChar32 c = DONE;
35 isEscaped = FALSE;
36
37 for (;;) {
38 c = _current();
39 _advance(UTF_CHAR_LENGTH(c));
40
41 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
42 (options & PARSE_VARIABLES) != 0 && sym != 0) {
43 UnicodeString name = sym->parseReference(text, pos, text.length());
44 // If name is empty there was an isolated SYMBOL_REF;
45 // return it. Caller must be prepared for this.
46 if (name.length() == 0) {
47 break;
48 }
49 bufPos = 0;
50 buf = sym->lookup(name);
51 if (buf == 0) {
52 ec = U_UNDEFINED_VARIABLE;
53 return DONE;
54 }
55 // Handle empty variable value
56 if (buf->length() == 0) {
57 buf = 0;
58 }
59 continue;
60 }
61
62 if ((options & SKIP_WHITESPACE) != 0 &&
63 uprv_isRuleWhiteSpace(c)) {
64 continue;
65 }
66
67 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
68 UnicodeString s;
69 int32_t offset = 0;
70 c = lookahead(s).unescapeAt(offset);
71 jumpahead(offset);
72 isEscaped = TRUE;
73 if (c < 0) {
74 ec = U_MALFORMED_UNICODE_ESCAPE;
75 return DONE;
76 }
77 }
78
79 break;
80 }
81
82 return c;
83}
84
85void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
86 p.buf = buf;
87 p.pos = pos.getIndex();
88 p.bufPos = bufPos;
89}
90
91void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
92 buf = p.buf;
93 pos.setIndex(p.pos);
94 bufPos = p.bufPos;
95}
96
97void RuleCharacterIterator::skipIgnored(int32_t options) {
98 if ((options & SKIP_WHITESPACE) != 0) {
99 for (;;) {
100 UChar32 a = _current();
101 if (!uprv_isRuleWhiteSpace(a)) break;
102 _advance(UTF_CHAR_LENGTH(a));
103 }
104 }
105}
106
107UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result) const {
108 if (buf != 0) {
109 buf->extract(bufPos, 0x7FFFFFFF, result);
110 } else {
111 text.extract(pos.getIndex(), 0x7FFFFFFF, result);
112 }
113 return result;
114}
115
116void RuleCharacterIterator::jumpahead(int32_t count) {
117 _advance(count);
118}
119
73c04bcf 120/*
374ca955
A
121UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
122 int32_t b = pos.getIndex();
123 text.extract(0, b, result);
73c04bcf 124 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
374ca955 125}
73c04bcf 126*/
374ca955
A
127
128UChar32 RuleCharacterIterator::_current() const {
129 if (buf != 0) {
130 return buf->char32At(bufPos);
131 } else {
132 int i = pos.getIndex();
133 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
134 }
135}
136
137void RuleCharacterIterator::_advance(int32_t count) {
138 if (buf != 0) {
139 bufPos += count;
140 if (bufPos == buf->length()) {
141 buf = 0;
142 }
143 } else {
144 pos.setIndex(pos.getIndex() + count);
145 if (pos.getIndex() > text.length()) {
146 pos.setIndex(text.length());
147 }
148 }
149}
150
151U_NAMESPACE_END
152
153//eof