]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/tokiter.cpp
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / test / intltest / tokiter.cpp
CommitLineData
374ca955
A
1/*
2**********************************************************************
4388f060 3* Copyright (c) 2004-2011, International Business Machines
374ca955
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* Author: Alan Liu
7* Created: March 22 2004
8* Since: ICU 3.0
9**********************************************************************
10*/
11#include "tokiter.h"
12#include "textfile.h"
4388f060 13#include "patternprops.h"
374ca955
A
14#include "util.h"
15#include "uprops.h"
16
17TokenIterator::TokenIterator(TextFile* r) {
18 reader = r;
19 done = haveLine = FALSE;
20 pos = lastpos = -1;
21}
22
23TokenIterator::~TokenIterator() {
24}
25
26UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
27 if (done || U_FAILURE(ec)) {
28 return FALSE;
29 }
30 token.truncate(0);
31 for (;;) {
32 if (!haveLine) {
33 if (!reader->readLineSkippingComments(line, ec)) {
34 done = TRUE;
35 return FALSE;
36 }
37 haveLine = TRUE;
38 pos = 0;
39 }
40 lastpos = pos;
41 if (!nextToken(token, ec)) {
42 haveLine = FALSE;
43 if (U_FAILURE(ec)) return FALSE;
44 continue;
45 }
46 return TRUE;
47 }
48}
49
50int32_t TokenIterator::getLineNumber() const {
51 return reader->getLineNumber();
52}
53
54/**
55 * Read the next token from 'this->line' and append it to 'token'.
4388f060 56 * Tokens are separated by Pattern_White_Space. Tokens may also be
374ca955
A
57 * delimited by double or single quotes. The closing quote must match
58 * the opening quote. If a '#' is encountered, the rest of the line
59 * is ignored, unless it is backslash-escaped or within quotes.
60 * @param token the token is appended to this StringBuffer
61 * @param ec input-output error code
62 * @return TRUE if a valid token is found, or FALSE if the end
63 * of the line is reached or an error occurs
64 */
65UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
66 ICU_Utility::skipWhitespace(line, pos, TRUE);
67 if (pos == line.length()) {
68 return FALSE;
69 }
70 UChar c = line.charAt(pos++);
71 UChar quote = 0;
72 switch (c) {
73 case 34/*'"'*/:
74 case 39/*'\\'*/:
75 quote = c;
76 break;
77 case 35/*'#'*/:
78 return FALSE;
79 default:
80 token.append(c);
81 break;
82 }
83 while (pos < line.length()) {
84 c = line.charAt(pos); // 16-bit ok
85 if (c == 92/*'\\'*/) {
86 UChar32 c32 = line.unescapeAt(pos);
87 if (c32 < 0) {
88 ec = U_MALFORMED_UNICODE_ESCAPE;
89 return FALSE;
90 }
91 token.append(c32);
92 } else if ((quote != 0 && c == quote) ||
4388f060 93 (quote == 0 && PatternProps::isWhiteSpace(c))) {
374ca955
A
94 ++pos;
95 return TRUE;
96 } else if (quote == 0 && c == '#') {
97 return TRUE; // do NOT increment
98 } else {
99 token.append(c);
100 ++pos;
101 }
102 }
103 if (quote != 0) {
104 ec = U_UNTERMINATED_QUOTE;
105 return FALSE;
106 }
107 return TRUE;
108}