]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/simpleformatter.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / simpleformatter.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * simpleformatter.cpp
7 */
8
9 #include "unicode/utypes.h"
10 #include "unicode/simpleformatter.h"
11 #include "unicode/unistr.h"
12 #include "uassert.h"
13
14 U_NAMESPACE_BEGIN
15
16 namespace {
17
18 /**
19 * Argument numbers must be smaller than this limit.
20 * Text segment lengths are offset by this much.
21 * This is currently the only unused char value in compiled patterns,
22 * except it is the maximum value of the first unit (max arg +1).
23 */
24 const int32_t ARG_NUM_LIMIT = 0x100;
25 /**
26 * Initial and maximum char/UChar value set for a text segment.
27 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
28 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
29 */
30 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff;
31 /**
32 * Maximum length of a text segment. Longer segments are split into shorter ones.
33 */
34 const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT;
35
36 enum {
37 APOS = 0x27,
38 DIGIT_ZERO = 0x30,
39 DIGIT_ONE = 0x31,
40 DIGIT_NINE = 0x39,
41 OPEN_BRACE = 0x7b,
42 CLOSE_BRACE = 0x7d
43 };
44
45 inline UBool isInvalidArray(const void *array, int32_t length) {
46 return (length < 0 || (array == NULL && length != 0));
47 }
48
49 } // namespace
50
51 SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) {
52 if (this == &other) {
53 return *this;
54 }
55 compiledPattern = other.compiledPattern;
56 return *this;
57 }
58
59 SimpleFormatter::~SimpleFormatter() {}
60
61 UBool SimpleFormatter::applyPatternMinMaxArguments(
62 const UnicodeString &pattern,
63 int32_t min, int32_t max,
64 UErrorCode &errorCode) {
65 if (U_FAILURE(errorCode)) {
66 return FALSE;
67 }
68 // Parse consistent with MessagePattern, but
69 // - support only simple numbered arguments
70 // - build a simple binary structure into the result string
71 const UChar *patternBuffer = pattern.getBuffer();
72 int32_t patternLength = pattern.length();
73 // Reserve the first char for the number of arguments.
74 compiledPattern.setTo((UChar)0);
75 int32_t textLength = 0;
76 int32_t maxArg = -1;
77 UBool inQuote = FALSE;
78 for (int32_t i = 0; i < patternLength;) {
79 UChar c = patternBuffer[i++];
80 if (c == APOS) {
81 if (i < patternLength && (c = patternBuffer[i]) == APOS) {
82 // double apostrophe, skip the second one
83 ++i;
84 } else if (inQuote) {
85 // skip the quote-ending apostrophe
86 inQuote = FALSE;
87 continue;
88 } else if (c == OPEN_BRACE || c == CLOSE_BRACE) {
89 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
90 ++i;
91 inQuote = TRUE;
92 } else {
93 // The apostrophe is part of literal text.
94 c = APOS;
95 }
96 } else if (!inQuote && c == OPEN_BRACE) {
97 if (textLength > 0) {
98 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
99 (UChar)(ARG_NUM_LIMIT + textLength));
100 textLength = 0;
101 }
102 int32_t argNumber;
103 if ((i + 1) < patternLength &&
104 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 &&
105 patternBuffer[i + 1] == CLOSE_BRACE) {
106 i += 2;
107 } else {
108 // Multi-digit argument number (no leading zero) or syntax error.
109 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
110 // around the number, but this class does not.
111 argNumber = -1;
112 if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
113 argNumber = c - DIGIT_ZERO;
114 while (i < patternLength &&
115 DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) {
116 argNumber = argNumber * 10 + (c - DIGIT_ZERO);
117 if (argNumber >= ARG_NUM_LIMIT) {
118 break;
119 }
120 }
121 }
122 if (argNumber < 0 || c != CLOSE_BRACE) {
123 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
124 return FALSE;
125 }
126 }
127 if (argNumber > maxArg) {
128 maxArg = argNumber;
129 }
130 compiledPattern.append((UChar)argNumber);
131 continue;
132 } // else: c is part of literal text
133 // Append c and track the literal-text segment length.
134 if (textLength == 0) {
135 // Reserve a char for the length of a new text segment, preset the maximum length.
136 compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR);
137 }
138 compiledPattern.append(c);
139 if (++textLength == MAX_SEGMENT_LENGTH) {
140 textLength = 0;
141 }
142 }
143 if (textLength > 0) {
144 compiledPattern.setCharAt(compiledPattern.length() - textLength - 1,
145 (UChar)(ARG_NUM_LIMIT + textLength));
146 }
147 int32_t argCount = maxArg + 1;
148 if (argCount < min || max < argCount) {
149 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
150 return FALSE;
151 }
152 compiledPattern.setCharAt(0, (UChar)argCount);
153 return TRUE;
154 }
155
156 UnicodeString& SimpleFormatter::format(
157 const UnicodeString &value0,
158 UnicodeString &appendTo, UErrorCode &errorCode) const {
159 const UnicodeString *values[] = { &value0 };
160 return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode);
161 }
162
163 UnicodeString& SimpleFormatter::format(
164 const UnicodeString &value0,
165 const UnicodeString &value1,
166 UnicodeString &appendTo, UErrorCode &errorCode) const {
167 const UnicodeString *values[] = { &value0, &value1 };
168 return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode);
169 }
170
171 UnicodeString& SimpleFormatter::format(
172 const UnicodeString &value0,
173 const UnicodeString &value1,
174 const UnicodeString &value2,
175 UnicodeString &appendTo, UErrorCode &errorCode) const {
176 const UnicodeString *values[] = { &value0, &value1, &value2 };
177 return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode);
178 }
179
180 UnicodeString& SimpleFormatter::formatAndAppend(
181 const UnicodeString *const *values, int32_t valuesLength,
182 UnicodeString &appendTo,
183 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
184 if (U_FAILURE(errorCode)) {
185 return appendTo;
186 }
187 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) ||
188 valuesLength < getArgumentLimit()) {
189 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
190 return appendTo;
191 }
192 return format(compiledPattern.getBuffer(), compiledPattern.length(), values,
193 appendTo, NULL, TRUE,
194 offsets, offsetsLength, errorCode);
195 }
196
197 UnicodeString &SimpleFormatter::formatAndReplace(
198 const UnicodeString *const *values, int32_t valuesLength,
199 UnicodeString &result,
200 int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const {
201 if (U_FAILURE(errorCode)) {
202 return result;
203 }
204 if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) {
205 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
206 return result;
207 }
208 const UChar *cp = compiledPattern.getBuffer();
209 int32_t cpLength = compiledPattern.length();
210 if (valuesLength < getArgumentLimit(cp, cpLength)) {
211 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
212 return result;
213 }
214
215 // If the pattern starts with an argument whose value is the same object
216 // as the result, then we keep the result contents and append to it.
217 // Otherwise we replace its contents.
218 int32_t firstArg = -1;
219 // If any non-initial argument value is the same object as the result,
220 // then we first copy its contents and use that instead while formatting.
221 UnicodeString resultCopy;
222 if (getArgumentLimit(cp, cpLength) > 0) {
223 for (int32_t i = 1; i < cpLength;) {
224 int32_t n = cp[i++];
225 if (n < ARG_NUM_LIMIT) {
226 if (values[n] == &result) {
227 if (i == 2) {
228 firstArg = n;
229 } else if (resultCopy.isEmpty() && !result.isEmpty()) {
230 resultCopy = result;
231 }
232 }
233 } else {
234 i += n - ARG_NUM_LIMIT;
235 }
236 }
237 }
238 if (firstArg < 0) {
239 result.remove();
240 }
241 return format(cp, cpLength, values,
242 result, &resultCopy, FALSE,
243 offsets, offsetsLength, errorCode);
244 }
245
246 UnicodeString SimpleFormatter::getTextWithNoArguments(
247 const UChar *compiledPattern, int32_t compiledPatternLength) {
248 int32_t capacity = compiledPatternLength - 1 -
249 getArgumentLimit(compiledPattern, compiledPatternLength);
250 UnicodeString sb(capacity, 0, 0); // Java: StringBuilder
251 for (int32_t i = 1; i < compiledPatternLength;) {
252 int32_t segmentLength = compiledPattern[i++] - ARG_NUM_LIMIT;
253 if (segmentLength > 0) {
254 sb.append(compiledPattern + i, segmentLength);
255 i += segmentLength;
256 }
257 }
258 return sb;
259 }
260
261 UnicodeString &SimpleFormatter::format(
262 const UChar *compiledPattern, int32_t compiledPatternLength,
263 const UnicodeString *const *values,
264 UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
265 int32_t *offsets, int32_t offsetsLength,
266 UErrorCode &errorCode) {
267 if (U_FAILURE(errorCode)) {
268 return result;
269 }
270 for (int32_t i = 0; i < offsetsLength; i++) {
271 offsets[i] = -1;
272 }
273 for (int32_t i = 1; i < compiledPatternLength;) {
274 int32_t n = compiledPattern[i++];
275 if (n < ARG_NUM_LIMIT) {
276 const UnicodeString *value = values[n];
277 if (value == NULL) {
278 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
279 return result;
280 }
281 if (value == &result) {
282 if (forbidResultAsValue) {
283 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284 return result;
285 }
286 if (i == 2) {
287 // We are appending to result which is also the first value object.
288 if (n < offsetsLength) {
289 offsets[n] = 0;
290 }
291 } else {
292 if (n < offsetsLength) {
293 offsets[n] = result.length();
294 }
295 result.append(*resultCopy);
296 }
297 } else {
298 if (n < offsetsLength) {
299 offsets[n] = result.length();
300 }
301 result.append(*value);
302 }
303 } else {
304 int32_t length = n - ARG_NUM_LIMIT;
305 result.append(compiledPattern + i, length);
306 i += length;
307 }
308 }
309 return result;
310 }
311
312 U_NAMESPACE_END