]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/tokiter.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / test / intltest / tokiter.cpp
CommitLineData
374ca955
A
1/*
2**********************************************************************
3* Copyright (c) 2004, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: March 22 2004
8* Since: ICU 3.0
9**********************************************************************
10*/
11#include "tokiter.h"
12#include "textfile.h"
13#include "util.h"
14#include "uprops.h"
15
16TokenIterator::TokenIterator(TextFile* r) {
17 reader = r;
18 done = haveLine = FALSE;
19 pos = lastpos = -1;
20}
21
22TokenIterator::~TokenIterator() {
23}
24
25UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
26 if (done || U_FAILURE(ec)) {
27 return FALSE;
28 }
29 token.truncate(0);
30 for (;;) {
31 if (!haveLine) {
32 if (!reader->readLineSkippingComments(line, ec)) {
33 done = TRUE;
34 return FALSE;
35 }
36 haveLine = TRUE;
37 pos = 0;
38 }
39 lastpos = pos;
40 if (!nextToken(token, ec)) {
41 haveLine = FALSE;
42 if (U_FAILURE(ec)) return FALSE;
43 continue;
44 }
45 return TRUE;
46 }
47}
48
49int32_t TokenIterator::getLineNumber() const {
50 return reader->getLineNumber();
51}
52
53/**
54 * Read the next token from 'this->line' and append it to 'token'.
55 * Tokens are separated by rule white space. Tokens may also be
56 * delimited by double or single quotes. The closing quote must match
57 * the opening quote. If a '#' is encountered, the rest of the line
58 * is ignored, unless it is backslash-escaped or within quotes.
59 * @param token the token is appended to this StringBuffer
60 * @param ec input-output error code
61 * @return TRUE if a valid token is found, or FALSE if the end
62 * of the line is reached or an error occurs
63 */
64UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
65 ICU_Utility::skipWhitespace(line, pos, TRUE);
66 if (pos == line.length()) {
67 return FALSE;
68 }
69 UChar c = line.charAt(pos++);
70 UChar quote = 0;
71 switch (c) {
72 case 34/*'"'*/:
73 case 39/*'\\'*/:
74 quote = c;
75 break;
76 case 35/*'#'*/:
77 return FALSE;
78 default:
79 token.append(c);
80 break;
81 }
82 while (pos < line.length()) {
83 c = line.charAt(pos); // 16-bit ok
84 if (c == 92/*'\\'*/) {
85 UChar32 c32 = line.unescapeAt(pos);
86 if (c32 < 0) {
87 ec = U_MALFORMED_UNICODE_ESCAPE;
88 return FALSE;
89 }
90 token.append(c32);
91 } else if ((quote != 0 && c == quote) ||
92 (quote == 0 && uprv_isRuleWhiteSpace(c))) {
93 ++pos;
94 return TRUE;
95 } else if (quote == 0 && c == '#') {
96 return TRUE; // do NOT increment
97 } else {
98 token.append(c);
99 ++pos;
100 }
101 }
102 if (quote != 0) {
103 ec = U_UNTERMINATED_QUOTE;
104 return FALSE;
105 }
106 return TRUE;
107}