]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/tokiter.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / intltest / tokiter.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4**********************************************************************
4388f060 5* Copyright (c) 2004-2011, International Business Machines
374ca955
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Author: Alan Liu
9* Created: March 22 2004
10* Since: ICU 3.0
11**********************************************************************
12*/
13#include "tokiter.h"
14#include "textfile.h"
4388f060 15#include "patternprops.h"
374ca955
A
16#include "util.h"
17#include "uprops.h"
18
19TokenIterator::TokenIterator(TextFile* r) {
20 reader = r;
21 done = haveLine = FALSE;
22 pos = lastpos = -1;
23}
24
25TokenIterator::~TokenIterator() {
26}
27
28UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) {
29 if (done || U_FAILURE(ec)) {
30 return FALSE;
31 }
32 token.truncate(0);
33 for (;;) {
34 if (!haveLine) {
35 if (!reader->readLineSkippingComments(line, ec)) {
36 done = TRUE;
37 return FALSE;
38 }
39 haveLine = TRUE;
40 pos = 0;
41 }
42 lastpos = pos;
43 if (!nextToken(token, ec)) {
44 haveLine = FALSE;
45 if (U_FAILURE(ec)) return FALSE;
46 continue;
47 }
48 return TRUE;
49 }
50}
51
52int32_t TokenIterator::getLineNumber() const {
53 return reader->getLineNumber();
54}
55
56/**
57 * Read the next token from 'this->line' and append it to 'token'.
4388f060 58 * Tokens are separated by Pattern_White_Space. Tokens may also be
374ca955
A
59 * delimited by double or single quotes. The closing quote must match
60 * the opening quote. If a '#' is encountered, the rest of the line
61 * is ignored, unless it is backslash-escaped or within quotes.
62 * @param token the token is appended to this StringBuffer
63 * @param ec input-output error code
64 * @return TRUE if a valid token is found, or FALSE if the end
65 * of the line is reached or an error occurs
66 */
67UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) {
68 ICU_Utility::skipWhitespace(line, pos, TRUE);
69 if (pos == line.length()) {
70 return FALSE;
71 }
72 UChar c = line.charAt(pos++);
73 UChar quote = 0;
74 switch (c) {
75 case 34/*'"'*/:
76 case 39/*'\\'*/:
77 quote = c;
78 break;
79 case 35/*'#'*/:
80 return FALSE;
81 default:
82 token.append(c);
83 break;
84 }
85 while (pos < line.length()) {
86 c = line.charAt(pos); // 16-bit ok
87 if (c == 92/*'\\'*/) {
88 UChar32 c32 = line.unescapeAt(pos);
89 if (c32 < 0) {
90 ec = U_MALFORMED_UNICODE_ESCAPE;
91 return FALSE;
92 }
93 token.append(c32);
94 } else if ((quote != 0 && c == quote) ||
4388f060 95 (quote == 0 && PatternProps::isWhiteSpace(c))) {
374ca955
A
96 ++pos;
97 return TRUE;
98 } else if (quote == 0 && c == '#') {
99 return TRUE; // do NOT increment
100 } else {
101 token.append(c);
102 ++pos;
103 }
104 }
105 if (quote != 0) {
106 ec = U_UNTERMINATED_QUOTE;
107 return FALSE;
108 }
109 return TRUE;
110}