icuSources/test/intltest/tokiter.cpp

   1 /*
   2 **********************************************************************
   3 * Copyright (c) 2004, International Business Machines
   4 * Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 * Author: Alan Liu
   7 * Created: March 22 2004
   8 * Since: ICU 3.0
   9 **********************************************************************
  10 */
  11 #include "tokiter.h"
  12 #include "textfile.h"
  13 #include "util.h"
  14 #include "uprops.h"
  15
  16 TokenIterator::TokenIterator(TextFile* r) {
  17     reader = r;
  18     done = haveLine = FALSE;
  19     pos = lastpos = -1;
  20 }
  21
  22 TokenIterator::~TokenIterator() {
  23 }
  24
  25 UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
  26     if (done || U_FAILURE(ec)) {
  27         return FALSE;
  28     }
  29     token.truncate(0);
  30     for (;;) {
  31         if (!haveLine) {
  32             if (!reader->readLineSkippingComments(line, ec)) {
  33                 done = TRUE;
  34                 return FALSE;
  35             }
  36             haveLine = TRUE;
  37             pos = 0;
  38         }
  39         lastpos = pos;
  40         if (!nextToken(token, ec)) {
  41             haveLine = FALSE;
  42             if (U_FAILURE(ec)) return FALSE;
  43             continue;
  44         }
  45         return TRUE;
  46     }
  47 }
  48
  49 int32_t TokenIterator::getLineNumber() const {
  50     return reader->getLineNumber();
  51 }
  52
  53 /**
  54  * Read the next token from 'this->line' and append it to 'token'.
  55  * Tokens are separated by rule white space.  Tokens may also be
  56  * delimited by double or single quotes.  The closing quote must match
  57  * the opening quote.  If a '#' is encountered, the rest of the line
  58  * is ignored, unless it is backslash-escaped or within quotes.
  59  * @param token the token is appended to this StringBuffer
  60  * @param ec input-output error code
  61  * @return TRUE if a valid token is found, or FALSE if the end
  62  * of the line is reached or an error occurs
  63  */
  64 UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
  65     ICU_Utility::skipWhitespace(line, pos, TRUE);
  66     if (pos == line.length()) {
  67         return FALSE;
  68     }
  69     UChar c = line.charAt(pos++);
  70     UChar quote = 0;
  71     switch (c) {
  72     case 34/*'"'*/:
  73     case 39/*'\\'*/:
  74         quote = c;
  75         break;
  76     case 35/*'#'*/:
  77         return FALSE;
  78     default:
  79         token.append(c);
  80         break;
  81     }
  82     while (pos < line.length()) {
  83         c = line.charAt(pos); // 16-bit ok
  84         if (c == 92/*'\\'*/) {
  85             UChar32 c32 = line.unescapeAt(pos);
  86             if (c32 < 0) {
  87                 ec = U_MALFORMED_UNICODE_ESCAPE;
  88                 return FALSE;
  89             }
  90             token.append(c32);
  91         } else if ((quote != 0 && c == quote) ||
  92                    (quote == 0 && uprv_isRuleWhiteSpace(c))) {
  93             ++pos;
  94             return TRUE;
  95         } else if (quote == 0 && c == '#') {
  96             return TRUE; // do NOT increment
  97         } else {
  98             token.append(c);
  99             ++pos;
 100         }
 101     }
 102     if (quote != 0) {
 103         ec = U_UNTERMINATED_QUOTE;
 104         return FALSE;
 105     }
 106     return TRUE;
 107 }