2 ********************************************************************** 
   3 * Copyright (c) 2003-2011, International Business Machines 
   4 * Corporation and others.  All Rights Reserved. 
   5 ********************************************************************** 
   7 * Created: September 24 2003 
   9 ********************************************************************** 
  12 #include "unicode/parsepos.h" 
  13 #include "unicode/symtable.h" 
  14 #include "unicode/unistr.h" 
  15 #include "unicode/utf16.h" 
  16 #include "patternprops.h" 
  18 /* \U87654321 or \ud800\udc00 */ 
  19 #define MAX_U_NOTATION_LEN 12 
  23 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString
& theText
, const SymbolTable
* theSym
, 
  24                       ParsePosition
& thePos
) : 
  32 UBool 
RuleCharacterIterator::atEnd() const { 
  33     return buf 
== 0 && pos
.getIndex() == text
.length(); 
  36 UChar32 
RuleCharacterIterator::next(int32_t options
, UBool
& isEscaped
, UErrorCode
& ec
) { 
  37     if (U_FAILURE(ec
)) return DONE
; 
  44         _advance(U16_LENGTH(c
)); 
  46         if (c 
== SymbolTable::SYMBOL_REF 
&& buf 
== 0 && 
  47             (options 
& PARSE_VARIABLES
) != 0 && sym 
!= 0) { 
  48             UnicodeString name 
= sym
->parseReference(text
, pos
, text
.length()); 
  49             // If name is empty there was an isolated SYMBOL_REF; 
  50             // return it.  Caller must be prepared for this. 
  51             if (name
.length() == 0) { 
  55             buf 
= sym
->lookup(name
); 
  57                 ec 
= U_UNDEFINED_VARIABLE
; 
  60             // Handle empty variable value 
  61             if (buf
->length() == 0) { 
  67         if ((options 
& SKIP_WHITESPACE
) != 0 && PatternProps::isWhiteSpace(c
)) { 
  71         if (c 
== 0x5C /*'\\'*/ && (options 
& PARSE_ESCAPES
) != 0) { 
  72             UnicodeString tempEscape
; 
  74             c 
= lookahead(tempEscape
, MAX_U_NOTATION_LEN
).unescapeAt(offset
); 
  78                 ec 
= U_MALFORMED_UNICODE_ESCAPE
; 
  89 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos
& p
) const { 
  91     p
.pos 
= pos
.getIndex(); 
  95 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos
& p
) { 
 101 void RuleCharacterIterator::skipIgnored(int32_t options
) { 
 102     if ((options 
& SKIP_WHITESPACE
) != 0) { 
 104             UChar32 a 
= _current(); 
 105             if (!PatternProps::isWhiteSpace(a
)) break; 
 106             _advance(U16_LENGTH(a
)); 
 111 UnicodeString
& RuleCharacterIterator::lookahead(UnicodeString
& result
, int32_t maxLookAhead
) const { 
 112     if (maxLookAhead 
< 0) { 
 113         maxLookAhead 
= 0x7FFFFFFF; 
 116         buf
->extract(bufPos
, maxLookAhead
, result
); 
 118         text
.extract(pos
.getIndex(), maxLookAhead
, result
); 
 123 void RuleCharacterIterator::jumpahead(int32_t count
) { 
 128 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { 
 129     int32_t b = pos.getIndex(); 
 130     text.extract(0, b, result); 
 131     return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index 
 135 UChar32 
RuleCharacterIterator::_current() const { 
 137         return buf
->char32At(bufPos
); 
 139         int i 
= pos
.getIndex(); 
 140         return (i 
< text
.length()) ? text
.char32At(i
) : (UChar32
)DONE
; 
 144 void RuleCharacterIterator::_advance(int32_t count
) { 
 147         if (bufPos 
== buf
->length()) { 
 151         pos
.setIndex(pos
.getIndex() + count
); 
 152         if (pos
.getIndex() > text
.length()) { 
 153             pos
.setIndex(text
.length());