icuSources/common/ruleiter.cpp

   1 /*
   2 **********************************************************************
   3 * Copyright (c) 2003-2005, International Business Machines
   4 * Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 * Author: Alan Liu
   7 * Created: September 24 2003
   8 * Since: ICU 2.8
   9 **********************************************************************
  10 */
  11 #include "ruleiter.h"
  12 #include "unicode/parsepos.h"
  13 #include "unicode/unistr.h"
  14 #include "unicode/symtable.h"
  15 #include "util.h"
  16
  17 U_NAMESPACE_BEGIN
  18
  19 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
  20                       ParsePosition& thePos) :
  21     text(theText),
  22     pos(thePos),
  23     sym(theSym),
  24     buf(0)
  25 {}
  26
  27 UBool RuleCharacterIterator::atEnd() const {
  28     return buf == 0 && pos.getIndex() == text.length();
  29 }
  30
  31 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
  32     if (U_FAILURE(ec)) return DONE;
  33
  34     UChar32 c = DONE;
  35     isEscaped = FALSE;
  36
  37     for (;;) {
  38         c = _current();
  39         _advance(UTF_CHAR_LENGTH(c));
  40
  41         if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
  42             (options & PARSE_VARIABLES) != 0 && sym != 0) {
  43             UnicodeString name = sym->parseReference(text, pos, text.length());
  44             // If name is empty there was an isolated SYMBOL_REF;
  45             // return it.  Caller must be prepared for this.
  46             if (name.length() == 0) {
  47                 break;
  48             }
  49             bufPos = 0;
  50             buf = sym->lookup(name);
  51             if (buf == 0) {
  52                 ec = U_UNDEFINED_VARIABLE;
  53                 return DONE;
  54             }
  55             // Handle empty variable value
  56             if (buf->length() == 0) {
  57                 buf = 0;
  58             }
  59             continue;
  60         }
  61
  62         if ((options & SKIP_WHITESPACE) != 0 &&
  63             uprv_isRuleWhiteSpace(c)) {
  64             continue;
  65         }
  66
  67         if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
  68             UnicodeString s;
  69             int32_t offset = 0;
  70             c = lookahead(s).unescapeAt(offset);
  71             jumpahead(offset);
  72             isEscaped = TRUE;
  73             if (c < 0) {
  74                 ec = U_MALFORMED_UNICODE_ESCAPE;
  75                 return DONE;
  76             }
  77         }
  78
  79         break;
  80     }
  81
  82     return c;
  83 }
  84
  85 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
  86     p.buf = buf;
  87     p.pos = pos.getIndex();
  88     p.bufPos = bufPos;
  89 }
  90
  91 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
  92     buf = p.buf;
  93     pos.setIndex(p.pos);
  94     bufPos = p.bufPos;
  95 }
  96
  97 void RuleCharacterIterator::skipIgnored(int32_t options) {
  98     if ((options & SKIP_WHITESPACE) != 0) {
  99         for (;;) {
 100             UChar32 a = _current();
 101             if (!uprv_isRuleWhiteSpace(a)) break;
 102             _advance(UTF_CHAR_LENGTH(a));
 103         }
 104     }
 105 }
 106
 107 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result) const {
 108     if (buf != 0) {
 109         buf->extract(bufPos, 0x7FFFFFFF, result);
 110     } else {
 111         text.extract(pos.getIndex(), 0x7FFFFFFF, result);
 112     }
 113     return result;
 114 }
 115
 116 void RuleCharacterIterator::jumpahead(int32_t count) {
 117     _advance(count);
 118 }
 119
 120 /*
 121 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
 122     int32_t b = pos.getIndex();
 123     text.extract(0, b, result);
 124     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
 125 }
 126 */
 127
 128 UChar32 RuleCharacterIterator::_current() const {
 129     if (buf != 0) {
 130         return buf->char32At(bufPos);
 131     } else {
 132         int i = pos.getIndex();
 133         return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
 134     }
 135 }
 136
 137 void RuleCharacterIterator::_advance(int32_t count) {
 138     if (buf != 0) {
 139         bufPos += count;
 140         if (bufPos == buf->length()) {
 141             buf = 0;
 142         }
 143     } else {
 144         pos.setIndex(pos.getIndex() + count);
 145         if (pos.getIndex() > text.length()) {
 146             pos.setIndex(text.length());
 147         }
 148     }
 149 }
 150
 151 U_NAMESPACE_END
 152
 153 //eof