icuSources/test/intltest/tokiter.cpp

   1 /*
   2 **********************************************************************
   3 * Copyright (c) 2004-2011, International Business Machines
   4 * Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 * Author: Alan Liu
   7 * Created: March 22 2004
   8 * Since: ICU 3.0
   9 **********************************************************************
  10 */
  11 #include "tokiter.h"
  12 #include "textfile.h"
  13 #include "patternprops.h"
  14 #include "util.h"
  15 #include "uprops.h"
  16
  17 TokenIterator::TokenIterator(TextFile* r) {
  18     reader = r;
  19     done = haveLine = FALSE;
  20     pos = lastpos = -1;
  21 }
  22
  23 TokenIterator::~TokenIterator() {
  24 }
  25
  26 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
  27     if (done || U_FAILURE(ec)) {
  28         return FALSE;
  29     }
  30     token.truncate(0);
  31     for (;;) {
  32         if (!haveLine) {
  33             if (!reader->readLineSkippingComments(line, ec)) {
  34                 done = TRUE;
  35                 return FALSE;
  36             }
  37             haveLine = TRUE;
  38             pos = 0;
  39         }
  40         lastpos = pos;
  41         if (!nextToken(token, ec)) {
  42             haveLine = FALSE;
  43             if (U_FAILURE(ec)) return FALSE;
  44             continue;
  45         }
  46         return TRUE;
  47     }
  48 }
  49
  50 int32_t TokenIterator::getLineNumber() const {
  51     return reader->getLineNumber();
  52 }
  53
  54 /**
  55  * Read the next token from 'this->line' and append it to 'token'.
  56  * Tokens are separated by Pattern_White_Space.  Tokens may also be
  57  * delimited by double or single quotes.  The closing quote must match
  58  * the opening quote.  If a '#' is encountered, the rest of the line
  59  * is ignored, unless it is backslash-escaped or within quotes.
  60  * @param token the token is appended to this StringBuffer
  61  * @param ec input-output error code
  62  * @return TRUE if a valid token is found, or FALSE if the end
  63  * of the line is reached or an error occurs
  64  */
  65 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
  66     ICU_Utility::skipWhitespace(line, pos, TRUE);
  67     if (pos == line.length()) {
  68         return FALSE;
  69     }
  70     UChar c = line.charAt(pos++);
  71     UChar quote = 0;
  72     switch (c) {
  73     case 34/*'"'*/:
  74     case 39/*'\\'*/:
  75         quote = c;
  76         break;
  77     case 35/*'#'*/:
  78         return FALSE;
  79     default:
  80         token.append(c);
  81         break;
  82     }
  83     while (pos < line.length()) {
  84         c = line.charAt(pos); // 16-bit ok
  85         if (c == 92/*'\\'*/) {
  86             UChar32 c32 = line.unescapeAt(pos);
  87             if (c32 < 0) {
  88                 ec = U_MALFORMED_UNICODE_ESCAPE;
  89                 return FALSE;
  90             }
  91             token.append(c32);
  92         } else if ((quote != 0 && c == quote) ||
  93                    (quote == 0 && PatternProps::isWhiteSpace(c))) {
  94             ++pos;
  95             return TRUE;
  96         } else if (quote == 0 && c == '#') {
  97             return TRUE; // do NOT increment
  98         } else {
  99             token.append(c);
 100             ++pos;
 101         }
 102     }
 103     if (quote != 0) {
 104         ec = U_UNTERMINATED_QUOTE;
 105         return FALSE;
 106     }
 107     return TRUE;
 108 }