]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ********************************************************************** | |
3 | * Copyright (c) 2004, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ********************************************************************** | |
6 | * Author: Alan Liu | |
7 | * Created: March 22 2004 | |
8 | * Since: ICU 3.0 | |
9 | ********************************************************************** | |
10 | */ | |
11 | #include "tokiter.h" | |
12 | #include "textfile.h" | |
13 | #include "util.h" | |
14 | #include "uprops.h" | |
15 | ||
16 | TokenIterator::TokenIterator(TextFile* r) { | |
17 | reader = r; | |
18 | done = haveLine = FALSE; | |
19 | pos = lastpos = -1; | |
20 | } | |
21 | ||
22 | TokenIterator::~TokenIterator() { | |
23 | } | |
24 | ||
25 | UBool TokenIterator::next(UnicodeString& token, UErrorCode& ec) { | |
26 | if (done || U_FAILURE(ec)) { | |
27 | return FALSE; | |
28 | } | |
29 | token.truncate(0); | |
30 | for (;;) { | |
31 | if (!haveLine) { | |
32 | if (!reader->readLineSkippingComments(line, ec)) { | |
33 | done = TRUE; | |
34 | return FALSE; | |
35 | } | |
36 | haveLine = TRUE; | |
37 | pos = 0; | |
38 | } | |
39 | lastpos = pos; | |
40 | if (!nextToken(token, ec)) { | |
41 | haveLine = FALSE; | |
42 | if (U_FAILURE(ec)) return FALSE; | |
43 | continue; | |
44 | } | |
45 | return TRUE; | |
46 | } | |
47 | } | |
48 | ||
49 | int32_t TokenIterator::getLineNumber() const { | |
50 | return reader->getLineNumber(); | |
51 | } | |
52 | ||
53 | /** | |
54 | * Read the next token from 'this->line' and append it to 'token'. | |
55 | * Tokens are separated by rule white space. Tokens may also be | |
56 | * delimited by double or single quotes. The closing quote must match | |
57 | * the opening quote. If a '#' is encountered, the rest of the line | |
58 | * is ignored, unless it is backslash-escaped or within quotes. | |
59 | * @param token the token is appended to this StringBuffer | |
60 | * @param ec input-output error code | |
61 | * @return TRUE if a valid token is found, or FALSE if the end | |
62 | * of the line is reached or an error occurs | |
63 | */ | |
64 | UBool TokenIterator::nextToken(UnicodeString& token, UErrorCode& ec) { | |
65 | ICU_Utility::skipWhitespace(line, pos, TRUE); | |
66 | if (pos == line.length()) { | |
67 | return FALSE; | |
68 | } | |
69 | UChar c = line.charAt(pos++); | |
70 | UChar quote = 0; | |
71 | switch (c) { | |
72 | case 34/*'"'*/: | |
73 | case 39/*'\\'*/: | |
74 | quote = c; | |
75 | break; | |
76 | case 35/*'#'*/: | |
77 | return FALSE; | |
78 | default: | |
79 | token.append(c); | |
80 | break; | |
81 | } | |
82 | while (pos < line.length()) { | |
83 | c = line.charAt(pos); // 16-bit ok | |
84 | if (c == 92/*'\\'*/) { | |
85 | UChar32 c32 = line.unescapeAt(pos); | |
86 | if (c32 < 0) { | |
87 | ec = U_MALFORMED_UNICODE_ESCAPE; | |
88 | return FALSE; | |
89 | } | |
90 | token.append(c32); | |
91 | } else if ((quote != 0 && c == quote) || | |
92 | (quote == 0 && uprv_isRuleWhiteSpace(c))) { | |
93 | ++pos; | |
94 | return TRUE; | |
95 | } else if (quote == 0 && c == '#') { | |
96 | return TRUE; // do NOT increment | |
97 | } else { | |
98 | token.append(c); | |
99 | ++pos; | |
100 | } | |
101 | } | |
102 | if (quote != 0) { | |
103 | ec = U_UNTERMINATED_QUOTE; | |
104 | return FALSE; | |
105 | } | |
106 | return TRUE; | |
107 | } |