1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
5 * Copyright (C) 2014-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/simpleformatter.h"
13 #include "unicode/unistr.h"
21 * Argument numbers must be smaller than this limit.
22 * Text segment lengths are offset by this much.
23 * This is currently the only unused char value in compiled patterns,
24 * except it is the maximum value of the first unit (max arg +1).
26 const int32_t ARG_NUM_LIMIT
= 0x100;
28 * Initial and maximum char/UChar value set for a text segment.
29 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
30 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
32 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR
= 0xffff;
34 * Maximum length of a text segment. Longer segments are split into shorter ones.
36 const int32_t MAX_SEGMENT_LENGTH
= SEGMENT_LENGTH_PLACEHOLDER_CHAR
- ARG_NUM_LIMIT
;
47 inline UBool
isInvalidArray(const void *array
, int32_t length
) {
48 return (length
< 0 || (array
== NULL
&& length
!= 0));
53 SimpleFormatter
&SimpleFormatter::operator=(const SimpleFormatter
& other
) {
57 compiledPattern
= other
.compiledPattern
;
61 SimpleFormatter::~SimpleFormatter() {}
63 UBool
SimpleFormatter::applyPatternMinMaxArguments(
64 const UnicodeString
&pattern
,
65 int32_t min
, int32_t max
,
66 UErrorCode
&errorCode
) {
67 if (U_FAILURE(errorCode
)) {
70 // Parse consistent with MessagePattern, but
71 // - support only simple numbered arguments
72 // - build a simple binary structure into the result string
73 const UChar
*patternBuffer
= pattern
.getBuffer();
74 int32_t patternLength
= pattern
.length();
75 // Reserve the first char for the number of arguments.
76 compiledPattern
.setTo((UChar
)0);
77 int32_t textLength
= 0;
79 UBool inQuote
= FALSE
;
80 for (int32_t i
= 0; i
< patternLength
;) {
81 UChar c
= patternBuffer
[i
++];
83 if (i
< patternLength
&& (c
= patternBuffer
[i
]) == APOS
) {
84 // double apostrophe, skip the second one
87 // skip the quote-ending apostrophe
90 } else if (c
== OPEN_BRACE
|| c
== CLOSE_BRACE
) {
91 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
95 // The apostrophe is part of literal text.
98 } else if (!inQuote
&& c
== OPEN_BRACE
) {
100 compiledPattern
.setCharAt(compiledPattern
.length() - textLength
- 1,
101 (UChar
)(ARG_NUM_LIMIT
+ textLength
));
105 if ((i
+ 1) < patternLength
&&
106 0 <= (argNumber
= patternBuffer
[i
] - DIGIT_ZERO
) && argNumber
<= 9 &&
107 patternBuffer
[i
+ 1] == CLOSE_BRACE
) {
110 // Multi-digit argument number (no leading zero) or syntax error.
111 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
112 // around the number, but this class does not.
114 if (i
< patternLength
&& DIGIT_ONE
<= (c
= patternBuffer
[i
++]) && c
<= DIGIT_NINE
) {
115 argNumber
= c
- DIGIT_ZERO
;
116 while (i
< patternLength
&&
117 DIGIT_ZERO
<= (c
= patternBuffer
[i
++]) && c
<= DIGIT_NINE
) {
118 argNumber
= argNumber
* 10 + (c
- DIGIT_ZERO
);
119 if (argNumber
>= ARG_NUM_LIMIT
) {
124 if (argNumber
< 0 || c
!= CLOSE_BRACE
) {
125 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
129 if (argNumber
> maxArg
) {
132 compiledPattern
.append((UChar
)argNumber
);
134 } // else: c is part of literal text
135 // Append c and track the literal-text segment length.
136 if (textLength
== 0) {
137 // Reserve a char for the length of a new text segment, preset the maximum length.
138 compiledPattern
.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR
);
140 compiledPattern
.append(c
);
141 if (++textLength
== MAX_SEGMENT_LENGTH
) {
145 if (textLength
> 0) {
146 compiledPattern
.setCharAt(compiledPattern
.length() - textLength
- 1,
147 (UChar
)(ARG_NUM_LIMIT
+ textLength
));
149 int32_t argCount
= maxArg
+ 1;
150 if (argCount
< min
|| max
< argCount
) {
151 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
154 compiledPattern
.setCharAt(0, (UChar
)argCount
);
158 UnicodeString
& SimpleFormatter::format(
159 const UnicodeString
&value0
,
160 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
161 const UnicodeString
*values
[] = { &value0
};
162 return formatAndAppend(values
, 1, appendTo
, NULL
, 0, errorCode
);
165 UnicodeString
& SimpleFormatter::format(
166 const UnicodeString
&value0
,
167 const UnicodeString
&value1
,
168 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
169 const UnicodeString
*values
[] = { &value0
, &value1
};
170 return formatAndAppend(values
, 2, appendTo
, NULL
, 0, errorCode
);
173 UnicodeString
& SimpleFormatter::format(
174 const UnicodeString
&value0
,
175 const UnicodeString
&value1
,
176 const UnicodeString
&value2
,
177 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
178 const UnicodeString
*values
[] = { &value0
, &value1
, &value2
};
179 return formatAndAppend(values
, 3, appendTo
, NULL
, 0, errorCode
);
182 UnicodeString
& SimpleFormatter::formatAndAppend(
183 const UnicodeString
*const *values
, int32_t valuesLength
,
184 UnicodeString
&appendTo
,
185 int32_t *offsets
, int32_t offsetsLength
, UErrorCode
&errorCode
) const {
186 if (U_FAILURE(errorCode
)) {
189 if (isInvalidArray(values
, valuesLength
) || isInvalidArray(offsets
, offsetsLength
) ||
190 valuesLength
< getArgumentLimit()) {
191 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
194 return format(compiledPattern
.getBuffer(), compiledPattern
.length(), values
,
195 appendTo
, NULL
, TRUE
,
196 offsets
, offsetsLength
, errorCode
);
199 UnicodeString
&SimpleFormatter::formatAndReplace(
200 const UnicodeString
*const *values
, int32_t valuesLength
,
201 UnicodeString
&result
,
202 int32_t *offsets
, int32_t offsetsLength
, UErrorCode
&errorCode
) const {
203 if (U_FAILURE(errorCode
)) {
206 if (isInvalidArray(values
, valuesLength
) || isInvalidArray(offsets
, offsetsLength
)) {
207 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
210 const UChar
*cp
= compiledPattern
.getBuffer();
211 int32_t cpLength
= compiledPattern
.length();
212 if (valuesLength
< getArgumentLimit(cp
, cpLength
)) {
213 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
217 // If the pattern starts with an argument whose value is the same object
218 // as the result, then we keep the result contents and append to it.
219 // Otherwise we replace its contents.
220 int32_t firstArg
= -1;
221 // If any non-initial argument value is the same object as the result,
222 // then we first copy its contents and use that instead while formatting.
223 UnicodeString resultCopy
;
224 if (getArgumentLimit(cp
, cpLength
) > 0) {
225 for (int32_t i
= 1; i
< cpLength
;) {
227 if (n
< ARG_NUM_LIMIT
) {
228 if (values
[n
] == &result
) {
231 } else if (resultCopy
.isEmpty() && !result
.isEmpty()) {
236 i
+= n
- ARG_NUM_LIMIT
;
243 return format(cp
, cpLength
, values
,
244 result
, &resultCopy
, FALSE
,
245 offsets
, offsetsLength
, errorCode
);
248 UnicodeString
SimpleFormatter::getTextWithNoArguments(
249 const UChar
*compiledPattern
,
250 int32_t compiledPatternLength
,
252 int32_t offsetsLength
) {
253 for (int32_t i
= 0; i
< offsetsLength
; i
++) {
256 int32_t capacity
= compiledPatternLength
- 1 -
257 getArgumentLimit(compiledPattern
, compiledPatternLength
);
258 UnicodeString
sb(capacity
, 0, 0); // Java: StringBuilder
259 for (int32_t i
= 1; i
< compiledPatternLength
;) {
260 int32_t n
= compiledPattern
[i
++];
261 if (n
> ARG_NUM_LIMIT
) {
263 sb
.append(compiledPattern
+ i
, n
);
265 } else if (n
< offsetsLength
) {
266 offsets
[n
] = sb
.length();
272 UnicodeString
&SimpleFormatter::format(
273 const UChar
*compiledPattern
, int32_t compiledPatternLength
,
274 const UnicodeString
*const *values
,
275 UnicodeString
&result
, const UnicodeString
*resultCopy
, UBool forbidResultAsValue
,
276 int32_t *offsets
, int32_t offsetsLength
,
277 UErrorCode
&errorCode
) {
278 if (U_FAILURE(errorCode
)) {
281 for (int32_t i
= 0; i
< offsetsLength
; i
++) {
284 for (int32_t i
= 1; i
< compiledPatternLength
;) {
285 int32_t n
= compiledPattern
[i
++];
286 if (n
< ARG_NUM_LIMIT
) {
287 const UnicodeString
*value
= values
[n
];
289 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
292 if (value
== &result
) {
293 if (forbidResultAsValue
) {
294 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
298 // We are appending to result which is also the first value object.
299 if (n
< offsetsLength
) {
303 if (n
< offsetsLength
) {
304 offsets
[n
] = result
.length();
306 result
.append(*resultCopy
);
309 if (n
< offsetsLength
) {
310 offsets
[n
] = result
.length();
312 result
.append(*value
);
315 int32_t length
= n
- ARG_NUM_LIMIT
;
316 result
.append(compiledPattern
+ i
, length
);