1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 #include "number_stringbuilder.h"
9 #include "static_unicode_sets.h"
10 #include "unicode/utf16.h"
11 #include "number_utils.h"
14 using namespace icu::number
;
15 using namespace icu::number::impl
;
19 // A version of uprv_memcpy that checks for length 0.
20 // By default, uprv_memcpy requires a length of at least 1.
21 inline void uprv_memcpy2(void* dest
, const void* src
, size_t len
) {
23 uprv_memcpy(dest
, src
, len
);
27 // A version of uprv_memmove that checks for length 0.
28 // By default, uprv_memmove requires a length of at least 1.
29 inline void uprv_memmove2(void* dest
, const void* src
, size_t len
) {
31 uprv_memmove(dest
, src
, len
);
37 NumberStringBuilder::NumberStringBuilder() {
39 // Initializing the memory to non-zero helps catch some bugs that involve
40 // reading from an improperly terminated string.
41 for (int32_t i
=0; i
<getCapacity(); i
++) {
47 NumberStringBuilder::~NumberStringBuilder() {
49 uprv_free(fChars
.heap
.ptr
);
50 uprv_free(fFields
.heap
.ptr
);
54 NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder
&other
) {
58 NumberStringBuilder
&NumberStringBuilder::operator=(const NumberStringBuilder
&other
) {
59 // Check for self-assignment
64 // Continue with deallocation and copying
66 uprv_free(fChars
.heap
.ptr
);
67 uprv_free(fFields
.heap
.ptr
);
71 int32_t capacity
= other
.getCapacity();
72 if (capacity
> DEFAULT_CAPACITY
) {
74 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75 auto newChars
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity
));
76 auto newFields
= static_cast<Field
*>(uprv_malloc(sizeof(Field
) * capacity
));
77 if (newChars
== nullptr || newFields
== nullptr) {
78 // UErrorCode is not available; fail silently.
81 *this = NumberStringBuilder(); // can't fail
86 fChars
.heap
.capacity
= capacity
;
87 fChars
.heap
.ptr
= newChars
;
88 fFields
.heap
.capacity
= capacity
;
89 fFields
.heap
.ptr
= newFields
;
92 uprv_memcpy2(getCharPtr(), other
.getCharPtr(), sizeof(char16_t) * capacity
);
93 uprv_memcpy2(getFieldPtr(), other
.getFieldPtr(), sizeof(Field
) * capacity
);
96 fLength
= other
.fLength
;
100 int32_t NumberStringBuilder::length() const {
104 int32_t NumberStringBuilder::codePointCount() const {
105 return u_countChar32(getCharPtr() + fZero
, fLength
);
108 UChar32
NumberStringBuilder::getFirstCodePoint() const {
113 U16_GET(getCharPtr() + fZero
, 0, 0, fLength
, cp
);
117 UChar32
NumberStringBuilder::getLastCodePoint() const {
121 int32_t offset
= fLength
;
122 U16_BACK_1(getCharPtr() + fZero
, 0, offset
);
124 U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
);
128 UChar32
NumberStringBuilder::codePointAt(int32_t index
) const {
130 U16_GET(getCharPtr() + fZero
, 0, index
, fLength
, cp
);
134 UChar32
NumberStringBuilder::codePointBefore(int32_t index
) const {
135 int32_t offset
= index
;
136 U16_BACK_1(getCharPtr() + fZero
, 0, offset
);
138 U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
);
142 NumberStringBuilder
&NumberStringBuilder::clear() {
143 // TODO: Reset the heap here?
144 fZero
= getCapacity() / 2;
149 int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint
, Field field
, UErrorCode
&status
) {
150 return insertCodePoint(fLength
, codePoint
, field
, status
);
154 NumberStringBuilder::insertCodePoint(int32_t index
, UChar32 codePoint
, Field field
, UErrorCode
&status
) {
155 int32_t count
= U16_LENGTH(codePoint
);
156 int32_t position
= prepareForInsert(index
, count
, status
);
157 if (U_FAILURE(status
)) {
161 getCharPtr()[position
] = (char16_t) codePoint
;
162 getFieldPtr()[position
] = field
;
164 getCharPtr()[position
] = U16_LEAD(codePoint
);
165 getCharPtr()[position
+ 1] = U16_TRAIL(codePoint
);
166 getFieldPtr()[position
] = getFieldPtr()[position
+ 1] = field
;
171 int32_t NumberStringBuilder::append(const UnicodeString
&unistr
, Field field
, UErrorCode
&status
) {
172 return insert(fLength
, unistr
, field
, status
);
175 int32_t NumberStringBuilder::insert(int32_t index
, const UnicodeString
&unistr
, Field field
,
176 UErrorCode
&status
) {
177 if (unistr
.length() == 0) {
178 // Nothing to insert.
180 } else if (unistr
.length() == 1) {
181 // Fast path: insert using insertCodePoint.
182 return insertCodePoint(index
, unistr
.charAt(0), field
, status
);
184 return insert(index
, unistr
, 0, unistr
.length(), field
, status
);
189 NumberStringBuilder::insert(int32_t index
, const UnicodeString
&unistr
, int32_t start
, int32_t end
,
190 Field field
, UErrorCode
&status
) {
191 int32_t count
= end
- start
;
192 int32_t position
= prepareForInsert(index
, count
, status
);
193 if (U_FAILURE(status
)) {
196 for (int32_t i
= 0; i
< count
; i
++) {
197 getCharPtr()[position
+ i
] = unistr
.charAt(start
+ i
);
198 getFieldPtr()[position
+ i
] = field
;
204 NumberStringBuilder::splice(int32_t startThis
, int32_t endThis
, const UnicodeString
&unistr
,
205 int32_t startOther
, int32_t endOther
, Field field
, UErrorCode
& status
) {
206 int32_t thisLength
= endThis
- startThis
;
207 int32_t otherLength
= endOther
- startOther
;
208 int32_t count
= otherLength
- thisLength
;
211 // Overall, chars need to be added.
212 position
= prepareForInsert(startThis
, count
, status
);
214 // Overall, chars need to be removed or kept the same.
215 position
= remove(startThis
, -count
);
217 if (U_FAILURE(status
)) {
220 for (int32_t i
= 0; i
< otherLength
; i
++) {
221 getCharPtr()[position
+ i
] = unistr
.charAt(startOther
+ i
);
222 getFieldPtr()[position
+ i
] = field
;
227 int32_t NumberStringBuilder::append(const NumberStringBuilder
&other
, UErrorCode
&status
) {
228 return insert(fLength
, other
, status
);
232 NumberStringBuilder::insert(int32_t index
, const NumberStringBuilder
&other
, UErrorCode
&status
) {
233 if (this == &other
) {
234 status
= U_ILLEGAL_ARGUMENT_ERROR
;
237 int32_t count
= other
.fLength
;
239 // Nothing to insert.
242 int32_t position
= prepareForInsert(index
, count
, status
);
243 if (U_FAILURE(status
)) {
246 for (int32_t i
= 0; i
< count
; i
++) {
247 getCharPtr()[position
+ i
] = other
.charAt(i
);
248 getFieldPtr()[position
+ i
] = other
.fieldAt(i
);
253 void NumberStringBuilder::writeTerminator(UErrorCode
& status
) {
254 int32_t position
= prepareForInsert(fLength
, 1, status
);
255 if (U_FAILURE(status
)) {
258 getCharPtr()[position
] = 0;
259 getFieldPtr()[position
] = UNUM_FIELD_COUNT
;
263 int32_t NumberStringBuilder::prepareForInsert(int32_t index
, int32_t count
, UErrorCode
&status
) {
264 U_ASSERT(index
>= 0);
265 U_ASSERT(index
<= fLength
);
266 U_ASSERT(count
>= 0);
267 if (index
== 0 && fZero
- count
>= 0) {
272 } else if (index
== fLength
&& fZero
+ fLength
+ count
< getCapacity()) {
275 return fZero
+ fLength
- count
;
277 // Move chars around and/or allocate more space
278 return prepareForInsertHelper(index
, count
, status
);
282 int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index
, int32_t count
, UErrorCode
&status
) {
283 int32_t oldCapacity
= getCapacity();
284 int32_t oldZero
= fZero
;
285 char16_t *oldChars
= getCharPtr();
286 Field
*oldFields
= getFieldPtr();
287 if (fLength
+ count
> oldCapacity
) {
288 int32_t newCapacity
= (fLength
+ count
) * 2;
289 int32_t newZero
= newCapacity
/ 2 - (fLength
+ count
) / 2;
291 // C++ note: malloc appears in two places: here and in the assignment operator.
292 auto newChars
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity
));
293 auto newFields
= static_cast<Field
*>(uprv_malloc(sizeof(Field
) * newCapacity
));
294 if (newChars
== nullptr || newFields
== nullptr) {
296 uprv_free(newFields
);
297 status
= U_MEMORY_ALLOCATION_ERROR
;
301 // First copy the prefix and then the suffix, leaving room for the new chars that the
302 // caller wants to insert.
303 // C++ note: memcpy is OK because the src and dest do not overlap.
304 uprv_memcpy2(newChars
+ newZero
, oldChars
+ oldZero
, sizeof(char16_t) * index
);
305 uprv_memcpy2(newChars
+ newZero
+ index
+ count
,
306 oldChars
+ oldZero
+ index
,
307 sizeof(char16_t) * (fLength
- index
));
308 uprv_memcpy2(newFields
+ newZero
, oldFields
+ oldZero
, sizeof(Field
) * index
);
309 uprv_memcpy2(newFields
+ newZero
+ index
+ count
,
310 oldFields
+ oldZero
+ index
,
311 sizeof(Field
) * (fLength
- index
));
315 uprv_free(oldFields
);
318 fChars
.heap
.ptr
= newChars
;
319 fChars
.heap
.capacity
= newCapacity
;
320 fFields
.heap
.ptr
= newFields
;
321 fFields
.heap
.capacity
= newCapacity
;
325 int32_t newZero
= oldCapacity
/ 2 - (fLength
+ count
) / 2;
327 // C++ note: memmove is required because src and dest may overlap.
328 // First copy the entire string to the location of the prefix, and then move the suffix
329 // to make room for the new chars that the caller wants to insert.
330 uprv_memmove2(oldChars
+ newZero
, oldChars
+ oldZero
, sizeof(char16_t) * fLength
);
331 uprv_memmove2(oldChars
+ newZero
+ index
+ count
,
332 oldChars
+ newZero
+ index
,
333 sizeof(char16_t) * (fLength
- index
));
334 uprv_memmove2(oldFields
+ newZero
, oldFields
+ oldZero
, sizeof(Field
) * fLength
);
335 uprv_memmove2(oldFields
+ newZero
+ index
+ count
,
336 oldFields
+ newZero
+ index
,
337 sizeof(Field
) * (fLength
- index
));
342 return fZero
+ index
;
345 int32_t NumberStringBuilder::remove(int32_t index
, int32_t count
) {
346 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
347 int32_t position
= index
+ fZero
;
348 uprv_memmove2(getCharPtr() + position
,
349 getCharPtr() + position
+ count
,
350 sizeof(char16_t) * (fLength
- index
- count
));
351 uprv_memmove2(getFieldPtr() + position
,
352 getFieldPtr() + position
+ count
,
353 sizeof(Field
) * (fLength
- index
- count
));
358 UnicodeString
NumberStringBuilder::toUnicodeString() const {
359 return UnicodeString(getCharPtr() + fZero
, fLength
);
362 const UnicodeString
NumberStringBuilder::toTempUnicodeString() const {
363 // Readonly-alias constructor:
364 return UnicodeString(FALSE
, getCharPtr() + fZero
, fLength
);
367 UnicodeString
NumberStringBuilder::toDebugString() const {
369 sb
.append(u
"<NumberStringBuilder [", -1);
370 sb
.append(toUnicodeString());
371 sb
.append(u
"] [", -1);
372 for (int i
= 0; i
< fLength
; i
++) {
373 if (fieldAt(i
) == UNUM_FIELD_COUNT
) {
377 switch (fieldAt(i
)) {
378 case UNUM_SIGN_FIELD
:
381 case UNUM_INTEGER_FIELD
:
384 case UNUM_FRACTION_FIELD
:
387 case UNUM_EXPONENT_FIELD
:
390 case UNUM_EXPONENT_SIGN_FIELD
:
393 case UNUM_EXPONENT_SYMBOL_FIELD
:
396 case UNUM_DECIMAL_SEPARATOR_FIELD
:
399 case UNUM_GROUPING_SEPARATOR_FIELD
:
402 case UNUM_PERCENT_FIELD
:
405 case UNUM_PERMILL_FIELD
:
408 case UNUM_CURRENCY_FIELD
:
418 sb
.append(u
"]>", -1);
422 const char16_t *NumberStringBuilder::chars() const {
423 return getCharPtr() + fZero
;
426 bool NumberStringBuilder::contentEquals(const NumberStringBuilder
&other
) const {
427 if (fLength
!= other
.fLength
) {
430 for (int32_t i
= 0; i
< fLength
; i
++) {
431 if (charAt(i
) != other
.charAt(i
) || fieldAt(i
) != other
.fieldAt(i
)) {
438 bool NumberStringBuilder::nextFieldPosition(FieldPosition
& fp
, UErrorCode
& status
) const {
439 int32_t rawField
= fp
.getField();
441 if (rawField
== FieldPosition::DONT_CARE
) {
445 if (rawField
< 0 || rawField
>= UNUM_FIELD_COUNT
) {
446 status
= U_ILLEGAL_ARGUMENT_ERROR
;
450 ConstrainedFieldPosition cfpos
;
451 cfpos
.constrainField(UFIELD_CATEGORY_NUMBER
, rawField
);
452 cfpos
.setState(UFIELD_CATEGORY_NUMBER
, rawField
, fp
.getBeginIndex(), fp
.getEndIndex());
453 if (nextPosition(cfpos
, 0, status
)) {
454 fp
.setBeginIndex(cfpos
.getStart());
455 fp
.setEndIndex(cfpos
.getLimit());
459 // Special case: fraction should start after integer if fraction is not present
460 if (rawField
== UNUM_FRACTION_FIELD
&& fp
.getEndIndex() == 0) {
463 for (; i
< fZero
+ fLength
; i
++) {
464 if (isIntOrGroup(getFieldPtr()[i
]) || getFieldPtr()[i
] == UNUM_DECIMAL_SEPARATOR_FIELD
) {
470 fp
.setBeginIndex(i
- fZero
);
471 fp
.setEndIndex(i
- fZero
);
477 void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler
& fpih
,
478 UErrorCode
& status
) const {
479 ConstrainedFieldPosition cfpos
;
480 while (nextPosition(cfpos
, 0, status
)) {
481 fpih
.addAttribute(cfpos
.getField(), cfpos
.getStart(), cfpos
.getLimit());
485 // Signal the end of the string using a field that doesn't exist and that is
486 // different from UNUM_FIELD_COUNT, which is used for "null number field".
487 static constexpr Field kEndField
= 0xff;
489 bool NumberStringBuilder::nextPosition(ConstrainedFieldPosition
& cfpos
, Field numericField
, UErrorCode
& /*status*/) const {
490 auto numericCAF
= NumFieldUtils::expand(numericField
);
491 int32_t fieldStart
= -1;
492 Field currField
= UNUM_FIELD_COUNT
;
493 for (int32_t i
= fZero
+ cfpos
.getLimit(); i
<= fZero
+ fLength
; i
++) {
494 Field _field
= (i
< fZero
+ fLength
) ? getFieldPtr()[i
] : kEndField
;
495 // Case 1: currently scanning a field.
496 if (currField
!= UNUM_FIELD_COUNT
) {
497 if (currField
!= _field
) {
498 int32_t end
= i
- fZero
;
499 // Grouping separators can be whitespace; don't throw them out!
500 if (currField
!= UNUM_GROUPING_SEPARATOR_FIELD
) {
501 end
= trimBack(i
- fZero
);
503 if (end
<= fieldStart
) {
504 // Entire field position is ignorable; skip.
506 currField
= UNUM_FIELD_COUNT
;
507 i
--; // look at this index again
510 int32_t start
= fieldStart
;
511 if (currField
!= UNUM_GROUPING_SEPARATOR_FIELD
) {
512 start
= trimFront(start
);
514 auto caf
= NumFieldUtils::expand(currField
);
515 cfpos
.setState(caf
.category
, caf
.field
, start
, end
);
520 // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
521 if (cfpos
.matchesField(UFIELD_CATEGORY_NUMBER
, UNUM_INTEGER_FIELD
)
523 // don't return the same field twice in a row:
524 && i
- fZero
> cfpos
.getLimit()
525 && isIntOrGroup(getFieldPtr()[i
- 1])
526 && !isIntOrGroup(_field
)) {
528 for (; j
>= fZero
&& isIntOrGroup(getFieldPtr()[j
]); j
--) {}
529 cfpos
.setState(UFIELD_CATEGORY_NUMBER
, UNUM_INTEGER_FIELD
, j
- fZero
+ 1, i
- fZero
);
532 // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
533 if (numericField
!= 0
534 && cfpos
.matchesField(numericCAF
.category
, numericCAF
.field
)
536 // don't return the same field twice in a row:
537 && (i
- fZero
> cfpos
.getLimit()
538 || cfpos
.getCategory() != numericCAF
.category
539 || cfpos
.getField() != numericCAF
.field
)
540 && isNumericField(getFieldPtr()[i
- 1])
541 && !isNumericField(_field
)) {
543 for (; j
>= fZero
&& isNumericField(getFieldPtr()[j
]); j
--) {}
544 cfpos
.setState(numericCAF
.category
, numericCAF
.field
, j
- fZero
+ 1, i
- fZero
);
547 // Special case: skip over INTEGER; will be coalesced later.
548 if (_field
== UNUM_INTEGER_FIELD
) {
549 _field
= UNUM_FIELD_COUNT
;
551 // Case 2: no field starting at this position.
552 if (_field
== UNUM_FIELD_COUNT
|| _field
== kEndField
) {
555 // Case 3: check for field starting at this position
556 auto caf
= NumFieldUtils::expand(_field
);
557 if (cfpos
.matchesField(caf
.category
, caf
.field
)) {
558 fieldStart
= i
- fZero
;
563 U_ASSERT(currField
== UNUM_FIELD_COUNT
);
567 bool NumberStringBuilder::containsField(Field field
) const {
568 for (int32_t i
= 0; i
< fLength
; i
++) {
569 if (field
== fieldAt(i
)) {
576 bool NumberStringBuilder::isIntOrGroup(Field field
) {
577 return field
== UNUM_INTEGER_FIELD
578 || field
== UNUM_GROUPING_SEPARATOR_FIELD
;
581 bool NumberStringBuilder::isNumericField(Field field
) {
582 return NumFieldUtils::isNumericField(field
);
585 int32_t NumberStringBuilder::trimBack(int32_t limit
) const {
586 return unisets::get(unisets::DEFAULT_IGNORABLES
)->spanBack(
587 getCharPtr() + fZero
,
589 USET_SPAN_CONTAINED
);
592 int32_t NumberStringBuilder::trimFront(int32_t start
) const {
593 return start
+ unisets::get(unisets::DEFAULT_IGNORABLES
)->span(
594 getCharPtr() + fZero
+ start
,
596 USET_SPAN_CONTAINED
);
599 #endif /* #if !UCONFIG_NO_FORMATTING */