1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "formatted_string_builder.h"
9 #include "unicode/ustring.h"
10 #include "unicode/utf16.h"
14 // A version of uprv_memcpy that checks for length 0.
15 // By default, uprv_memcpy requires a length of at least 1.
16 inline void uprv_memcpy2(void* dest
, const void* src
, size_t len
) {
18 uprv_memcpy(dest
, src
, len
);
22 // A version of uprv_memmove that checks for length 0.
23 // By default, uprv_memmove requires a length of at least 1.
24 inline void uprv_memmove2(void* dest
, const void* src
, size_t len
) {
26 uprv_memmove(dest
, src
, len
);
35 FormattedStringBuilder::FormattedStringBuilder() {
37 // Initializing the memory to non-zero helps catch some bugs that involve
38 // reading from an improperly terminated string.
39 for (int32_t i
=0; i
<getCapacity(); i
++) {
45 FormattedStringBuilder::~FormattedStringBuilder() {
47 uprv_free(fChars
.heap
.ptr
);
48 uprv_free(fFields
.heap
.ptr
);
52 FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder
&other
) {
56 FormattedStringBuilder
&FormattedStringBuilder::operator=(const FormattedStringBuilder
&other
) {
57 // Check for self-assignment
62 // Continue with deallocation and copying
64 uprv_free(fChars
.heap
.ptr
);
65 uprv_free(fFields
.heap
.ptr
);
69 int32_t capacity
= other
.getCapacity();
70 if (capacity
> DEFAULT_CAPACITY
) {
72 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
73 auto newChars
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity
));
74 auto newFields
= static_cast<Field
*>(uprv_malloc(sizeof(Field
) * capacity
));
75 if (newChars
== nullptr || newFields
== nullptr) {
76 // UErrorCode is not available; fail silently.
79 *this = FormattedStringBuilder(); // can't fail
84 fChars
.heap
.capacity
= capacity
;
85 fChars
.heap
.ptr
= newChars
;
86 fFields
.heap
.capacity
= capacity
;
87 fFields
.heap
.ptr
= newFields
;
90 uprv_memcpy2(getCharPtr(), other
.getCharPtr(), sizeof(char16_t) * capacity
);
91 uprv_memcpy2(getFieldPtr(), other
.getFieldPtr(), sizeof(Field
) * capacity
);
94 fLength
= other
.fLength
;
98 int32_t FormattedStringBuilder::length() const {
102 int32_t FormattedStringBuilder::codePointCount() const {
103 return u_countChar32(getCharPtr() + fZero
, fLength
);
106 UChar32
FormattedStringBuilder::getFirstCodePoint() const {
111 U16_GET(getCharPtr() + fZero
, 0, 0, fLength
, cp
);
115 UChar32
FormattedStringBuilder::getLastCodePoint() const {
119 int32_t offset
= fLength
;
120 U16_BACK_1(getCharPtr() + fZero
, 0, offset
);
122 U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
);
126 UChar32
FormattedStringBuilder::codePointAt(int32_t index
) const {
128 U16_GET(getCharPtr() + fZero
, 0, index
, fLength
, cp
);
132 UChar32
FormattedStringBuilder::codePointBefore(int32_t index
) const {
133 int32_t offset
= index
;
134 U16_BACK_1(getCharPtr() + fZero
, 0, offset
);
136 U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
);
140 FormattedStringBuilder
&FormattedStringBuilder::clear() {
141 // TODO: Reset the heap here?
142 fZero
= getCapacity() / 2;
148 FormattedStringBuilder::insertCodePoint(int32_t index
, UChar32 codePoint
, Field field
, UErrorCode
&status
) {
149 int32_t count
= U16_LENGTH(codePoint
);
150 int32_t position
= prepareForInsert(index
, count
, status
);
151 if (U_FAILURE(status
)) {
155 getCharPtr()[position
] = (char16_t) codePoint
;
156 getFieldPtr()[position
] = field
;
158 getCharPtr()[position
] = U16_LEAD(codePoint
);
159 getCharPtr()[position
+ 1] = U16_TRAIL(codePoint
);
160 getFieldPtr()[position
] = getFieldPtr()[position
+ 1] = field
;
165 int32_t FormattedStringBuilder::insert(int32_t index
, const UnicodeString
&unistr
, Field field
,
166 UErrorCode
&status
) {
167 if (unistr
.length() == 0) {
168 // Nothing to insert.
170 } else if (unistr
.length() == 1) {
171 // Fast path: insert using insertCodePoint.
172 return insertCodePoint(index
, unistr
.charAt(0), field
, status
);
174 return insert(index
, unistr
, 0, unistr
.length(), field
, status
);
179 FormattedStringBuilder::insert(int32_t index
, const UnicodeString
&unistr
, int32_t start
, int32_t end
,
180 Field field
, UErrorCode
&status
) {
181 int32_t count
= end
- start
;
182 int32_t position
= prepareForInsert(index
, count
, status
);
183 if (U_FAILURE(status
)) {
186 for (int32_t i
= 0; i
< count
; i
++) {
187 getCharPtr()[position
+ i
] = unistr
.charAt(start
+ i
);
188 getFieldPtr()[position
+ i
] = field
;
194 FormattedStringBuilder::splice(int32_t startThis
, int32_t endThis
, const UnicodeString
&unistr
,
195 int32_t startOther
, int32_t endOther
, Field field
, UErrorCode
& status
) {
196 int32_t thisLength
= endThis
- startThis
;
197 int32_t otherLength
= endOther
- startOther
;
198 int32_t count
= otherLength
- thisLength
;
201 // Overall, chars need to be added.
202 position
= prepareForInsert(startThis
, count
, status
);
204 // Overall, chars need to be removed or kept the same.
205 position
= remove(startThis
, -count
);
207 if (U_FAILURE(status
)) {
210 for (int32_t i
= 0; i
< otherLength
; i
++) {
211 getCharPtr()[position
+ i
] = unistr
.charAt(startOther
+ i
);
212 getFieldPtr()[position
+ i
] = field
;
217 int32_t FormattedStringBuilder::append(const FormattedStringBuilder
&other
, UErrorCode
&status
) {
218 return insert(fLength
, other
, status
);
222 FormattedStringBuilder::insert(int32_t index
, const FormattedStringBuilder
&other
, UErrorCode
&status
) {
223 if (this == &other
) {
224 status
= U_ILLEGAL_ARGUMENT_ERROR
;
227 int32_t count
= other
.fLength
;
229 // Nothing to insert.
232 int32_t position
= prepareForInsert(index
, count
, status
);
233 if (U_FAILURE(status
)) {
236 for (int32_t i
= 0; i
< count
; i
++) {
237 getCharPtr()[position
+ i
] = other
.charAt(i
);
238 getFieldPtr()[position
+ i
] = other
.fieldAt(i
);
243 void FormattedStringBuilder::writeTerminator(UErrorCode
& status
) {
244 int32_t position
= prepareForInsert(fLength
, 1, status
);
245 if (U_FAILURE(status
)) {
248 getCharPtr()[position
] = 0;
249 getFieldPtr()[position
] = UNUM_FIELD_COUNT
;
253 int32_t FormattedStringBuilder::prepareForInsert(int32_t index
, int32_t count
, UErrorCode
&status
) {
254 U_ASSERT(index
>= 0);
255 U_ASSERT(index
<= fLength
);
256 U_ASSERT(count
>= 0);
257 if (index
== 0 && fZero
- count
>= 0) {
262 } else if (index
== fLength
&& fZero
+ fLength
+ count
< getCapacity()) {
265 return fZero
+ fLength
- count
;
267 // Move chars around and/or allocate more space
268 return prepareForInsertHelper(index
, count
, status
);
272 int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index
, int32_t count
, UErrorCode
&status
) {
273 int32_t oldCapacity
= getCapacity();
274 int32_t oldZero
= fZero
;
275 char16_t *oldChars
= getCharPtr();
276 Field
*oldFields
= getFieldPtr();
277 if (fLength
+ count
> oldCapacity
) {
278 int32_t newCapacity
= (fLength
+ count
) * 2;
279 int32_t newZero
= newCapacity
/ 2 - (fLength
+ count
) / 2;
281 // C++ note: malloc appears in two places: here and in the assignment operator.
282 auto newChars
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity
));
283 auto newFields
= static_cast<Field
*>(uprv_malloc(sizeof(Field
) * newCapacity
));
284 if (newChars
== nullptr || newFields
== nullptr) {
286 uprv_free(newFields
);
287 status
= U_MEMORY_ALLOCATION_ERROR
;
291 // First copy the prefix and then the suffix, leaving room for the new chars that the
292 // caller wants to insert.
293 // C++ note: memcpy is OK because the src and dest do not overlap.
294 uprv_memcpy2(newChars
+ newZero
, oldChars
+ oldZero
, sizeof(char16_t) * index
);
295 uprv_memcpy2(newChars
+ newZero
+ index
+ count
,
296 oldChars
+ oldZero
+ index
,
297 sizeof(char16_t) * (fLength
- index
));
298 uprv_memcpy2(newFields
+ newZero
, oldFields
+ oldZero
, sizeof(Field
) * index
);
299 uprv_memcpy2(newFields
+ newZero
+ index
+ count
,
300 oldFields
+ oldZero
+ index
,
301 sizeof(Field
) * (fLength
- index
));
305 uprv_free(oldFields
);
308 fChars
.heap
.ptr
= newChars
;
309 fChars
.heap
.capacity
= newCapacity
;
310 fFields
.heap
.ptr
= newFields
;
311 fFields
.heap
.capacity
= newCapacity
;
315 int32_t newZero
= oldCapacity
/ 2 - (fLength
+ count
) / 2;
317 // C++ note: memmove is required because src and dest may overlap.
318 // First copy the entire string to the location of the prefix, and then move the suffix
319 // to make room for the new chars that the caller wants to insert.
320 uprv_memmove2(oldChars
+ newZero
, oldChars
+ oldZero
, sizeof(char16_t) * fLength
);
321 uprv_memmove2(oldChars
+ newZero
+ index
+ count
,
322 oldChars
+ newZero
+ index
,
323 sizeof(char16_t) * (fLength
- index
));
324 uprv_memmove2(oldFields
+ newZero
, oldFields
+ oldZero
, sizeof(Field
) * fLength
);
325 uprv_memmove2(oldFields
+ newZero
+ index
+ count
,
326 oldFields
+ newZero
+ index
,
327 sizeof(Field
) * (fLength
- index
));
332 return fZero
+ index
;
335 int32_t FormattedStringBuilder::remove(int32_t index
, int32_t count
) {
336 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
337 int32_t position
= index
+ fZero
;
338 uprv_memmove2(getCharPtr() + position
,
339 getCharPtr() + position
+ count
,
340 sizeof(char16_t) * (fLength
- index
- count
));
341 uprv_memmove2(getFieldPtr() + position
,
342 getFieldPtr() + position
+ count
,
343 sizeof(Field
) * (fLength
- index
- count
));
348 UnicodeString
FormattedStringBuilder::toUnicodeString() const {
349 return UnicodeString(getCharPtr() + fZero
, fLength
);
352 const UnicodeString
FormattedStringBuilder::toTempUnicodeString() const {
353 // Readonly-alias constructor:
354 return UnicodeString(FALSE
, getCharPtr() + fZero
, fLength
);
357 UnicodeString
FormattedStringBuilder::toDebugString() const {
359 sb
.append(u
"<FormattedStringBuilder [", -1);
360 sb
.append(toUnicodeString());
361 sb
.append(u
"] [", -1);
362 for (int i
= 0; i
< fLength
; i
++) {
363 if (fieldAt(i
) == UNUM_FIELD_COUNT
) {
367 switch (fieldAt(i
)) {
368 case UNUM_SIGN_FIELD
:
371 case UNUM_INTEGER_FIELD
:
374 case UNUM_FRACTION_FIELD
:
377 case UNUM_EXPONENT_FIELD
:
380 case UNUM_EXPONENT_SIGN_FIELD
:
383 case UNUM_EXPONENT_SYMBOL_FIELD
:
386 case UNUM_DECIMAL_SEPARATOR_FIELD
:
389 case UNUM_GROUPING_SEPARATOR_FIELD
:
392 case UNUM_PERCENT_FIELD
:
395 case UNUM_PERMILL_FIELD
:
398 case UNUM_CURRENCY_FIELD
:
408 sb
.append(u
"]>", -1);
412 const char16_t *FormattedStringBuilder::chars() const {
413 return getCharPtr() + fZero
;
416 bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder
&other
) const {
417 if (fLength
!= other
.fLength
) {
420 for (int32_t i
= 0; i
< fLength
; i
++) {
421 if (charAt(i
) != other
.charAt(i
) || fieldAt(i
) != other
.fieldAt(i
)) {
428 bool FormattedStringBuilder::containsField(Field field
) const {
429 for (int32_t i
= 0; i
< fLength
; i
++) {
430 if (field
== fieldAt(i
)) {
439 #endif /* #if !UCONFIG_NO_FORMATTING */