]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ********************************************************************** | |
4388f060 | 3 | * Copyright (c) 2004-2011, International Business Machines |
374ca955 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * Author: Alan Liu | |
7 | * Created: March 22 2004 | |
8 | * Since: ICU 3.0 | |
9 | ********************************************************************** | |
10 | */ | |
11 | #include "tokiter.h" | |
12 | #include "textfile.h" | |
4388f060 | 13 | #include "patternprops.h" |
374ca955 A |
14 | #include "util.h" |
15 | #include "uprops.h" | |
16 | ||
17 | TokenIterator::TokenIterator(TextFile* r) { | |
18 | reader = r; | |
19 | done = haveLine = FALSE; | |
20 | pos = lastpos = -1; | |
21 | } | |
22 | ||
23 | TokenIterator::~TokenIterator() { | |
24 | } | |
25 | ||
26 | UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { | |
27 | if (done || U_FAILURE(ec)) { | |
28 | return FALSE; | |
29 | } | |
30 | token.truncate(0); | |
31 | for (;;) { | |
32 | if (!haveLine) { | |
33 | if (!reader->readLineSkippingComments(line, ec)) { | |
34 | done = TRUE; | |
35 | return FALSE; | |
36 | } | |
37 | haveLine = TRUE; | |
38 | pos = 0; | |
39 | } | |
40 | lastpos = pos; | |
41 | if (!nextToken(token, ec)) { | |
42 | haveLine = FALSE; | |
43 | if (U_FAILURE(ec)) return FALSE; | |
44 | continue; | |
45 | } | |
46 | return TRUE; | |
47 | } | |
48 | } | |
49 | ||
50 | int32_t TokenIterator::getLineNumber() const { | |
51 | return reader->getLineNumber(); | |
52 | } | |
53 | ||
54 | /** | |
55 | * Read the next token from 'this->line' and append it to 'token'. | |
4388f060 | 56 | * Tokens are separated by Pattern_White_Space. Tokens may also be |
374ca955 A |
57 | * delimited by double or single quotes. The closing quote must match |
58 | * the opening quote. If a '#' is encountered, the rest of the line | |
59 | * is ignored, unless it is backslash-escaped or within quotes. | |
60 | * @param token the token is appended to this StringBuffer | |
61 | * @param ec input-output error code | |
62 | * @return TRUE if a valid token is found, or FALSE if the end | |
63 | * of the line is reached or an error occurs | |
64 | */ | |
65 | UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { | |
66 | ICU_Utility::skipWhitespace(line, pos, TRUE); | |
67 | if (pos == line.length()) { | |
68 | return FALSE; | |
69 | } | |
70 | UChar c = line.charAt(pos++); | |
71 | UChar quote = 0; | |
72 | switch (c) { | |
73 | case 34/*'"'*/: | |
74 | case 39/*'\\'*/: | |
75 | quote = c; | |
76 | break; | |
77 | case 35/*'#'*/: | |
78 | return FALSE; | |
79 | default: | |
80 | token.append(c); | |
81 | break; | |
82 | } | |
83 | while (pos < line.length()) { | |
84 | c = line.charAt(pos); // 16-bit ok | |
85 | if (c == 92/*'\\'*/) { | |
86 | UChar32 c32 = line.unescapeAt(pos); | |
87 | if (c32 < 0) { | |
88 | ec = U_MALFORMED_UNICODE_ESCAPE; | |
89 | return FALSE; | |
90 | } | |
91 | token.append(c32); | |
92 | } else if ((quote != 0 && c == quote) || | |
4388f060 | 93 | (quote == 0 && PatternProps::isWhiteSpace(c))) { |
374ca955 A |
94 | ++pos; |
95 | return TRUE; | |
96 | } else if (quote == 0 && c == '#') { | |
97 | return TRUE; // do NOT increment | |
98 | } else { | |
99 | token.append(c); | |
100 | ++pos; | |
101 | } | |
102 | } | |
103 | if (quote != 0) { | |
104 | ec = U_UNTERMINATED_QUOTE; | |
105 | return FALSE; | |
106 | } | |
107 | return TRUE; | |
108 | } |