2 ******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
9 #include "unicode/utypes.h"
10 #include "unicode/simpleformatter.h"
11 #include "unicode/unistr.h"
19 * Argument numbers must be smaller than this limit.
20 * Text segment lengths are offset by this much.
21 * This is currently the only unused char value in compiled patterns,
22 * except it is the maximum value of the first unit (max arg +1).
24 const int32_t ARG_NUM_LIMIT
= 0x100;
26 * Initial and maximum char/UChar value set for a text segment.
27 * Segment length char values are from ARG_NUM_LIMIT+1 to this value here.
28 * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing.
30 const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR
= 0xffff;
32 * Maximum length of a text segment. Longer segments are split into shorter ones.
34 const int32_t MAX_SEGMENT_LENGTH
= SEGMENT_LENGTH_PLACEHOLDER_CHAR
- ARG_NUM_LIMIT
;
45 inline UBool
isInvalidArray(const void *array
, int32_t length
) {
46 return (length
< 0 || (array
== NULL
&& length
!= 0));
51 SimpleFormatter
&SimpleFormatter::operator=(const SimpleFormatter
& other
) {
55 compiledPattern
= other
.compiledPattern
;
59 SimpleFormatter::~SimpleFormatter() {}
61 UBool
SimpleFormatter::applyPatternMinMaxArguments(
62 const UnicodeString
&pattern
,
63 int32_t min
, int32_t max
,
64 UErrorCode
&errorCode
) {
65 if (U_FAILURE(errorCode
)) {
68 // Parse consistent with MessagePattern, but
69 // - support only simple numbered arguments
70 // - build a simple binary structure into the result string
71 const UChar
*patternBuffer
= pattern
.getBuffer();
72 int32_t patternLength
= pattern
.length();
73 // Reserve the first char for the number of arguments.
74 compiledPattern
.setTo((UChar
)0);
75 int32_t textLength
= 0;
77 UBool inQuote
= FALSE
;
78 for (int32_t i
= 0; i
< patternLength
;) {
79 UChar c
= patternBuffer
[i
++];
81 if (i
< patternLength
&& (c
= patternBuffer
[i
]) == APOS
) {
82 // double apostrophe, skip the second one
85 // skip the quote-ending apostrophe
88 } else if (c
== OPEN_BRACE
|| c
== CLOSE_BRACE
) {
89 // Skip the quote-starting apostrophe, find the end of the quoted literal text.
93 // The apostrophe is part of literal text.
96 } else if (!inQuote
&& c
== OPEN_BRACE
) {
98 compiledPattern
.setCharAt(compiledPattern
.length() - textLength
- 1,
99 (UChar
)(ARG_NUM_LIMIT
+ textLength
));
103 if ((i
+ 1) < patternLength
&&
104 0 <= (argNumber
= patternBuffer
[i
] - DIGIT_ZERO
) && argNumber
<= 9 &&
105 patternBuffer
[i
+ 1] == CLOSE_BRACE
) {
108 // Multi-digit argument number (no leading zero) or syntax error.
109 // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index)
110 // around the number, but this class does not.
112 if (i
< patternLength
&& DIGIT_ONE
<= (c
= patternBuffer
[i
++]) && c
<= DIGIT_NINE
) {
113 argNumber
= c
- DIGIT_ZERO
;
114 while (i
< patternLength
&&
115 DIGIT_ZERO
<= (c
= patternBuffer
[i
++]) && c
<= DIGIT_NINE
) {
116 argNumber
= argNumber
* 10 + (c
- DIGIT_ZERO
);
117 if (argNumber
>= ARG_NUM_LIMIT
) {
122 if (argNumber
< 0 || c
!= CLOSE_BRACE
) {
123 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
127 if (argNumber
> maxArg
) {
130 compiledPattern
.append((UChar
)argNumber
);
132 } // else: c is part of literal text
133 // Append c and track the literal-text segment length.
134 if (textLength
== 0) {
135 // Reserve a char for the length of a new text segment, preset the maximum length.
136 compiledPattern
.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR
);
138 compiledPattern
.append(c
);
139 if (++textLength
== MAX_SEGMENT_LENGTH
) {
143 if (textLength
> 0) {
144 compiledPattern
.setCharAt(compiledPattern
.length() - textLength
- 1,
145 (UChar
)(ARG_NUM_LIMIT
+ textLength
));
147 int32_t argCount
= maxArg
+ 1;
148 if (argCount
< min
|| max
< argCount
) {
149 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
152 compiledPattern
.setCharAt(0, (UChar
)argCount
);
156 UnicodeString
& SimpleFormatter::format(
157 const UnicodeString
&value0
,
158 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
159 const UnicodeString
*values
[] = { &value0
};
160 return formatAndAppend(values
, 1, appendTo
, NULL
, 0, errorCode
);
163 UnicodeString
& SimpleFormatter::format(
164 const UnicodeString
&value0
,
165 const UnicodeString
&value1
,
166 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
167 const UnicodeString
*values
[] = { &value0
, &value1
};
168 return formatAndAppend(values
, 2, appendTo
, NULL
, 0, errorCode
);
171 UnicodeString
& SimpleFormatter::format(
172 const UnicodeString
&value0
,
173 const UnicodeString
&value1
,
174 const UnicodeString
&value2
,
175 UnicodeString
&appendTo
, UErrorCode
&errorCode
) const {
176 const UnicodeString
*values
[] = { &value0
, &value1
, &value2
};
177 return formatAndAppend(values
, 3, appendTo
, NULL
, 0, errorCode
);
180 UnicodeString
& SimpleFormatter::formatAndAppend(
181 const UnicodeString
*const *values
, int32_t valuesLength
,
182 UnicodeString
&appendTo
,
183 int32_t *offsets
, int32_t offsetsLength
, UErrorCode
&errorCode
) const {
184 if (U_FAILURE(errorCode
)) {
187 if (isInvalidArray(values
, valuesLength
) || isInvalidArray(offsets
, offsetsLength
) ||
188 valuesLength
< getArgumentLimit()) {
189 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
192 return format(compiledPattern
.getBuffer(), compiledPattern
.length(), values
,
193 appendTo
, NULL
, TRUE
,
194 offsets
, offsetsLength
, errorCode
);
197 UnicodeString
&SimpleFormatter::formatAndReplace(
198 const UnicodeString
*const *values
, int32_t valuesLength
,
199 UnicodeString
&result
,
200 int32_t *offsets
, int32_t offsetsLength
, UErrorCode
&errorCode
) const {
201 if (U_FAILURE(errorCode
)) {
204 if (isInvalidArray(values
, valuesLength
) || isInvalidArray(offsets
, offsetsLength
)) {
205 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
208 const UChar
*cp
= compiledPattern
.getBuffer();
209 int32_t cpLength
= compiledPattern
.length();
210 if (valuesLength
< getArgumentLimit(cp
, cpLength
)) {
211 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
215 // If the pattern starts with an argument whose value is the same object
216 // as the result, then we keep the result contents and append to it.
217 // Otherwise we replace its contents.
218 int32_t firstArg
= -1;
219 // If any non-initial argument value is the same object as the result,
220 // then we first copy its contents and use that instead while formatting.
221 UnicodeString resultCopy
;
222 if (getArgumentLimit(cp
, cpLength
) > 0) {
223 for (int32_t i
= 1; i
< cpLength
;) {
225 if (n
< ARG_NUM_LIMIT
) {
226 if (values
[n
] == &result
) {
229 } else if (resultCopy
.isEmpty() && !result
.isEmpty()) {
234 i
+= n
- ARG_NUM_LIMIT
;
241 return format(cp
, cpLength
, values
,
242 result
, &resultCopy
, FALSE
,
243 offsets
, offsetsLength
, errorCode
);
246 UnicodeString
SimpleFormatter::getTextWithNoArguments(
247 const UChar
*compiledPattern
, int32_t compiledPatternLength
) {
248 int32_t capacity
= compiledPatternLength
- 1 -
249 getArgumentLimit(compiledPattern
, compiledPatternLength
);
250 UnicodeString
sb(capacity
, 0, 0); // Java: StringBuilder
251 for (int32_t i
= 1; i
< compiledPatternLength
;) {
252 int32_t segmentLength
= compiledPattern
[i
++] - ARG_NUM_LIMIT
;
253 if (segmentLength
> 0) {
254 sb
.append(compiledPattern
+ i
, segmentLength
);
261 UnicodeString
&SimpleFormatter::format(
262 const UChar
*compiledPattern
, int32_t compiledPatternLength
,
263 const UnicodeString
*const *values
,
264 UnicodeString
&result
, const UnicodeString
*resultCopy
, UBool forbidResultAsValue
,
265 int32_t *offsets
, int32_t offsetsLength
,
266 UErrorCode
&errorCode
) {
267 if (U_FAILURE(errorCode
)) {
270 for (int32_t i
= 0; i
< offsetsLength
; i
++) {
273 for (int32_t i
= 1; i
< compiledPatternLength
;) {
274 int32_t n
= compiledPattern
[i
++];
275 if (n
< ARG_NUM_LIMIT
) {
276 const UnicodeString
*value
= values
[n
];
278 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
281 if (value
== &result
) {
282 if (forbidResultAsValue
) {
283 errorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
287 // We are appending to result which is also the first value object.
288 if (n
< offsetsLength
) {
292 if (n
< offsetsLength
) {
293 offsets
[n
] = result
.length();
295 result
.append(*resultCopy
);
298 if (n
< offsetsLength
) {
299 offsets
[n
] = result
.length();
301 result
.append(*value
);
304 int32_t length
= n
- ARG_NUM_LIMIT
;
305 result
.append(compiledPattern
+ i
, length
);