1 // © 2017 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 #include "unicode/utypes.h" 
   6 #if !UCONFIG_NO_FORMATTING 
   8 #include "number_stringbuilder.h" 
   9 #include "static_unicode_sets.h" 
  10 #include "unicode/utf16.h" 
  11 #include "number_utils.h" 
  14 using namespace icu::number
; 
  15 using namespace icu::number::impl
; 
  19 // A version of uprv_memcpy that checks for length 0. 
  20 // By default, uprv_memcpy requires a length of at least 1. 
  21 inline void uprv_memcpy2(void* dest
, const void* src
, size_t len
) { 
  23         uprv_memcpy(dest
, src
, len
); 
  27 // A version of uprv_memmove that checks for length 0. 
  28 // By default, uprv_memmove requires a length of at least 1. 
  29 inline void uprv_memmove2(void* dest
, const void* src
, size_t len
) { 
  31         uprv_memmove(dest
, src
, len
); 
  37 NumberStringBuilder::NumberStringBuilder() { 
  39     // Initializing the memory to non-zero helps catch some bugs that involve 
  40     // reading from an improperly terminated string. 
  41     for (int32_t i
=0; i
<getCapacity(); i
++) { 
  47 NumberStringBuilder::~NumberStringBuilder() { 
  49         uprv_free(fChars
.heap
.ptr
); 
  50         uprv_free(fFields
.heap
.ptr
); 
  54 NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder 
&other
) { 
  58 NumberStringBuilder 
&NumberStringBuilder::operator=(const NumberStringBuilder 
&other
) { 
  59     // Check for self-assignment 
  64     // Continue with deallocation and copying 
  66         uprv_free(fChars
.heap
.ptr
); 
  67         uprv_free(fFields
.heap
.ptr
); 
  71     int32_t capacity 
= other
.getCapacity(); 
  72     if (capacity 
> DEFAULT_CAPACITY
) { 
  74         // C++ note: malloc appears in two places: here and in prepareForInsertHelper. 
  75         auto newChars 
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity
)); 
  76         auto newFields 
= static_cast<Field 
*>(uprv_malloc(sizeof(Field
) * capacity
)); 
  77         if (newChars 
== nullptr || newFields 
== nullptr) { 
  78             // UErrorCode is not available; fail silently. 
  81             *this = NumberStringBuilder();  // can't fail 
  86         fChars
.heap
.capacity 
= capacity
; 
  87         fChars
.heap
.ptr 
= newChars
; 
  88         fFields
.heap
.capacity 
= capacity
; 
  89         fFields
.heap
.ptr 
= newFields
; 
  92     uprv_memcpy2(getCharPtr(), other
.getCharPtr(), sizeof(char16_t) * capacity
); 
  93     uprv_memcpy2(getFieldPtr(), other
.getFieldPtr(), sizeof(Field
) * capacity
); 
  96     fLength 
= other
.fLength
; 
 100 int32_t NumberStringBuilder::length() const { 
 104 int32_t NumberStringBuilder::codePointCount() const { 
 105     return u_countChar32(getCharPtr() + fZero
, fLength
); 
 108 UChar32 
NumberStringBuilder::getFirstCodePoint() const { 
 113     U16_GET(getCharPtr() + fZero
, 0, 0, fLength
, cp
); 
 117 UChar32 
NumberStringBuilder::getLastCodePoint() const { 
 121     int32_t offset 
= fLength
; 
 122     U16_BACK_1(getCharPtr() + fZero
, 0, offset
); 
 124     U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
); 
 128 UChar32 
NumberStringBuilder::codePointAt(int32_t index
) const { 
 130     U16_GET(getCharPtr() + fZero
, 0, index
, fLength
, cp
); 
 134 UChar32 
NumberStringBuilder::codePointBefore(int32_t index
) const { 
 135     int32_t offset 
= index
; 
 136     U16_BACK_1(getCharPtr() + fZero
, 0, offset
); 
 138     U16_GET(getCharPtr() + fZero
, 0, offset
, fLength
, cp
); 
 142 NumberStringBuilder 
&NumberStringBuilder::clear() { 
 143     // TODO: Reset the heap here? 
 144     fZero 
= getCapacity() / 2; 
 149 int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint
, Field field
, UErrorCode 
&status
) { 
 150     return insertCodePoint(fLength
, codePoint
, field
, status
); 
 154 NumberStringBuilder::insertCodePoint(int32_t index
, UChar32 codePoint
, Field field
, UErrorCode 
&status
) { 
 155     int32_t count 
= U16_LENGTH(codePoint
); 
 156     int32_t position 
= prepareForInsert(index
, count
, status
); 
 157     if (U_FAILURE(status
)) { 
 161         getCharPtr()[position
] = (char16_t) codePoint
; 
 162         getFieldPtr()[position
] = field
; 
 164         getCharPtr()[position
] = U16_LEAD(codePoint
); 
 165         getCharPtr()[position 
+ 1] = U16_TRAIL(codePoint
); 
 166         getFieldPtr()[position
] = getFieldPtr()[position 
+ 1] = field
; 
 171 int32_t NumberStringBuilder::append(const UnicodeString 
&unistr
, Field field
, UErrorCode 
&status
) { 
 172     return insert(fLength
, unistr
, field
, status
); 
 175 int32_t NumberStringBuilder::insert(int32_t index
, const UnicodeString 
&unistr
, Field field
, 
 176                                     UErrorCode 
&status
) { 
 177     if (unistr
.length() == 0) { 
 178         // Nothing to insert. 
 180     } else if (unistr
.length() == 1) { 
 181         // Fast path: insert using insertCodePoint. 
 182         return insertCodePoint(index
, unistr
.charAt(0), field
, status
); 
 184         return insert(index
, unistr
, 0, unistr
.length(), field
, status
); 
 189 NumberStringBuilder::insert(int32_t index
, const UnicodeString 
&unistr
, int32_t start
, int32_t end
, 
 190                             Field field
, UErrorCode 
&status
) { 
 191     int32_t count 
= end 
- start
; 
 192     int32_t position 
= prepareForInsert(index
, count
, status
); 
 193     if (U_FAILURE(status
)) { 
 196     for (int32_t i 
= 0; i 
< count
; i
++) { 
 197         getCharPtr()[position 
+ i
] = unistr
.charAt(start 
+ i
); 
 198         getFieldPtr()[position 
+ i
] = field
; 
 204 NumberStringBuilder::splice(int32_t startThis
, int32_t endThis
,  const UnicodeString 
&unistr
, 
 205                             int32_t startOther
, int32_t endOther
, Field field
, UErrorCode
& status
) { 
 206     int32_t thisLength 
= endThis 
- startThis
; 
 207     int32_t otherLength 
= endOther 
- startOther
; 
 208     int32_t count 
= otherLength 
- thisLength
; 
 211         // Overall, chars need to be added. 
 212         position 
= prepareForInsert(startThis
, count
, status
); 
 214         // Overall, chars need to be removed or kept the same. 
 215         position 
= remove(startThis
, -count
); 
 217     if (U_FAILURE(status
)) { 
 220     for (int32_t i 
= 0; i 
< otherLength
; i
++) { 
 221         getCharPtr()[position 
+ i
] = unistr
.charAt(startOther 
+ i
); 
 222         getFieldPtr()[position 
+ i
] = field
; 
 227 int32_t NumberStringBuilder::append(const NumberStringBuilder 
&other
, UErrorCode 
&status
) { 
 228     return insert(fLength
, other
, status
); 
 232 NumberStringBuilder::insert(int32_t index
, const NumberStringBuilder 
&other
, UErrorCode 
&status
) { 
 233     if (this == &other
) { 
 234         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 237     int32_t count 
= other
.fLength
; 
 239         // Nothing to insert. 
 242     int32_t position 
= prepareForInsert(index
, count
, status
); 
 243     if (U_FAILURE(status
)) { 
 246     for (int32_t i 
= 0; i 
< count
; i
++) { 
 247         getCharPtr()[position 
+ i
] = other
.charAt(i
); 
 248         getFieldPtr()[position 
+ i
] = other
.fieldAt(i
); 
 253 void NumberStringBuilder::writeTerminator(UErrorCode
& status
) { 
 254     int32_t position 
= prepareForInsert(fLength
, 1, status
); 
 255     if (U_FAILURE(status
)) { 
 258     getCharPtr()[position
] = 0; 
 259     getFieldPtr()[position
] = UNUM_FIELD_COUNT
; 
 263 int32_t NumberStringBuilder::prepareForInsert(int32_t index
, int32_t count
, UErrorCode 
&status
) { 
 264     U_ASSERT(index 
>= 0); 
 265     U_ASSERT(index 
<= fLength
); 
 266     U_ASSERT(count 
>= 0); 
 267     if (index 
== 0 && fZero 
- count 
>= 0) { 
 272     } else if (index 
== fLength 
&& fZero 
+ fLength 
+ count 
< getCapacity()) { 
 275         return fZero 
+ fLength 
- count
; 
 277         // Move chars around and/or allocate more space 
 278         return prepareForInsertHelper(index
, count
, status
); 
 282 int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index
, int32_t count
, UErrorCode 
&status
) { 
 283     int32_t oldCapacity 
= getCapacity(); 
 284     int32_t oldZero 
= fZero
; 
 285     char16_t *oldChars 
= getCharPtr(); 
 286     Field 
*oldFields 
= getFieldPtr(); 
 287     if (fLength 
+ count 
> oldCapacity
) { 
 288         int32_t newCapacity 
= (fLength 
+ count
) * 2; 
 289         int32_t newZero 
= newCapacity 
/ 2 - (fLength 
+ count
) / 2; 
 291         // C++ note: malloc appears in two places: here and in the assignment operator. 
 292         auto newChars 
= static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity
)); 
 293         auto newFields 
= static_cast<Field 
*>(uprv_malloc(sizeof(Field
) * newCapacity
)); 
 294         if (newChars 
== nullptr || newFields 
== nullptr) { 
 296             uprv_free(newFields
); 
 297             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 301         // First copy the prefix and then the suffix, leaving room for the new chars that the 
 302         // caller wants to insert. 
 303         // C++ note: memcpy is OK because the src and dest do not overlap. 
 304         uprv_memcpy2(newChars 
+ newZero
, oldChars 
+ oldZero
, sizeof(char16_t) * index
); 
 305         uprv_memcpy2(newChars 
+ newZero 
+ index 
+ count
, 
 306                 oldChars 
+ oldZero 
+ index
, 
 307                 sizeof(char16_t) * (fLength 
- index
)); 
 308         uprv_memcpy2(newFields 
+ newZero
, oldFields 
+ oldZero
, sizeof(Field
) * index
); 
 309         uprv_memcpy2(newFields 
+ newZero 
+ index 
+ count
, 
 310                 oldFields 
+ oldZero 
+ index
, 
 311                 sizeof(Field
) * (fLength 
- index
)); 
 315             uprv_free(oldFields
); 
 318         fChars
.heap
.ptr 
= newChars
; 
 319         fChars
.heap
.capacity 
= newCapacity
; 
 320         fFields
.heap
.ptr 
= newFields
; 
 321         fFields
.heap
.capacity 
= newCapacity
; 
 325         int32_t newZero 
= oldCapacity 
/ 2 - (fLength 
+ count
) / 2; 
 327         // C++ note: memmove is required because src and dest may overlap. 
 328         // First copy the entire string to the location of the prefix, and then move the suffix 
 329         // to make room for the new chars that the caller wants to insert. 
 330         uprv_memmove2(oldChars 
+ newZero
, oldChars 
+ oldZero
, sizeof(char16_t) * fLength
); 
 331         uprv_memmove2(oldChars 
+ newZero 
+ index 
+ count
, 
 332                 oldChars 
+ newZero 
+ index
, 
 333                 sizeof(char16_t) * (fLength 
- index
)); 
 334         uprv_memmove2(oldFields 
+ newZero
, oldFields 
+ oldZero
, sizeof(Field
) * fLength
); 
 335         uprv_memmove2(oldFields 
+ newZero 
+ index 
+ count
, 
 336                 oldFields 
+ newZero 
+ index
, 
 337                 sizeof(Field
) * (fLength 
- index
)); 
 342     return fZero 
+ index
; 
 345 int32_t NumberStringBuilder::remove(int32_t index
, int32_t count
) { 
 346     // TODO: Reset the heap here?  (If the string after removal can fit on stack?) 
 347     int32_t position 
= index 
+ fZero
; 
 348     uprv_memmove2(getCharPtr() + position
, 
 349             getCharPtr() + position 
+ count
, 
 350             sizeof(char16_t) * (fLength 
- index 
- count
)); 
 351     uprv_memmove2(getFieldPtr() + position
, 
 352             getFieldPtr() + position 
+ count
, 
 353             sizeof(Field
) * (fLength 
- index 
- count
)); 
 358 UnicodeString 
NumberStringBuilder::toUnicodeString() const { 
 359     return UnicodeString(getCharPtr() + fZero
, fLength
); 
 362 const UnicodeString 
NumberStringBuilder::toTempUnicodeString() const { 
 363     // Readonly-alias constructor: 
 364     return UnicodeString(FALSE
, getCharPtr() + fZero
, fLength
); 
 367 UnicodeString 
NumberStringBuilder::toDebugString() const { 
 369     sb
.append(u
"<NumberStringBuilder [", -1); 
 370     sb
.append(toUnicodeString()); 
 371     sb
.append(u
"] [", -1); 
 372     for (int i 
= 0; i 
< fLength
; i
++) { 
 373         if (fieldAt(i
) == UNUM_FIELD_COUNT
) { 
 377             switch (fieldAt(i
)) { 
 378                 case UNUM_SIGN_FIELD
: 
 381                 case UNUM_INTEGER_FIELD
: 
 384                 case UNUM_FRACTION_FIELD
: 
 387                 case UNUM_EXPONENT_FIELD
: 
 390                 case UNUM_EXPONENT_SIGN_FIELD
: 
 393                 case UNUM_EXPONENT_SYMBOL_FIELD
: 
 396                 case UNUM_DECIMAL_SEPARATOR_FIELD
: 
 399                 case UNUM_GROUPING_SEPARATOR_FIELD
: 
 402                 case UNUM_PERCENT_FIELD
: 
 405                 case UNUM_PERMILL_FIELD
: 
 408                 case UNUM_CURRENCY_FIELD
: 
 418     sb
.append(u
"]>", -1); 
 422 const char16_t *NumberStringBuilder::chars() const { 
 423     return getCharPtr() + fZero
; 
 426 bool NumberStringBuilder::contentEquals(const NumberStringBuilder 
&other
) const { 
 427     if (fLength 
!= other
.fLength
) { 
 430     for (int32_t i 
= 0; i 
< fLength
; i
++) { 
 431         if (charAt(i
) != other
.charAt(i
) || fieldAt(i
) != other
.fieldAt(i
)) { 
 438 bool NumberStringBuilder::nextFieldPosition(FieldPosition
& fp
, UErrorCode
& status
) const { 
 439     int32_t rawField 
= fp
.getField(); 
 441     if (rawField 
== FieldPosition::DONT_CARE
) { 
 445     if (rawField 
< 0 || rawField 
>= UNUM_FIELD_COUNT
) { 
 446         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 450     ConstrainedFieldPosition cfpos
; 
 451     cfpos
.constrainField(UFIELD_CATEGORY_NUMBER
, rawField
); 
 452     cfpos
.setState(UFIELD_CATEGORY_NUMBER
, rawField
, fp
.getBeginIndex(), fp
.getEndIndex()); 
 453     if (nextPosition(cfpos
, 0, status
)) { 
 454         fp
.setBeginIndex(cfpos
.getStart()); 
 455         fp
.setEndIndex(cfpos
.getLimit()); 
 459     // Special case: fraction should start after integer if fraction is not present 
 460     if (rawField 
== UNUM_FRACTION_FIELD 
&& fp
.getEndIndex() == 0) { 
 463         for (; i 
< fZero 
+ fLength
; i
++) { 
 464             if (isIntOrGroup(getFieldPtr()[i
]) || getFieldPtr()[i
] == UNUM_DECIMAL_SEPARATOR_FIELD
) { 
 470         fp
.setBeginIndex(i 
- fZero
); 
 471         fp
.setEndIndex(i 
- fZero
); 
 477 void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler
& fpih
, 
 478                                                UErrorCode
& status
) const { 
 479     ConstrainedFieldPosition cfpos
; 
 480     while (nextPosition(cfpos
, 0, status
)) { 
 481         fpih
.addAttribute(cfpos
.getField(), cfpos
.getStart(), cfpos
.getLimit()); 
 485 // Signal the end of the string using a field that doesn't exist and that is 
 486 // different from UNUM_FIELD_COUNT, which is used for "null number field". 
 487 static constexpr Field kEndField 
= 0xff; 
 489 bool NumberStringBuilder::nextPosition(ConstrainedFieldPosition
& cfpos
, Field numericField
, UErrorCode
& /*status*/) const { 
 490     auto numericCAF 
= NumFieldUtils::expand(numericField
); 
 491     int32_t fieldStart 
= -1; 
 492     Field currField 
= UNUM_FIELD_COUNT
; 
 493     for (int32_t i 
= fZero 
+ cfpos
.getLimit(); i 
<= fZero 
+ fLength
; i
++) { 
 494         Field _field 
= (i 
< fZero 
+ fLength
) ? getFieldPtr()[i
] : kEndField
; 
 495         // Case 1: currently scanning a field. 
 496         if (currField 
!= UNUM_FIELD_COUNT
) { 
 497             if (currField 
!= _field
) { 
 498                 int32_t end 
= i 
- fZero
; 
 499                 // Grouping separators can be whitespace; don't throw them out! 
 500                 if (currField 
!= UNUM_GROUPING_SEPARATOR_FIELD
) { 
 501                     end 
= trimBack(i 
- fZero
); 
 503                 if (end 
<= fieldStart
) { 
 504                     // Entire field position is ignorable; skip. 
 506                     currField 
= UNUM_FIELD_COUNT
; 
 507                     i
--;  // look at this index again 
 510                 int32_t start 
= fieldStart
; 
 511                 if (currField 
!= UNUM_GROUPING_SEPARATOR_FIELD
) { 
 512                     start 
= trimFront(start
); 
 514                 auto caf 
= NumFieldUtils::expand(currField
); 
 515                 cfpos
.setState(caf
.category
, caf
.field
, start
, end
); 
 520         // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER. 
 521         if (cfpos
.matchesField(UFIELD_CATEGORY_NUMBER
, UNUM_INTEGER_FIELD
) 
 523                 // don't return the same field twice in a row: 
 524                 && i 
- fZero 
> cfpos
.getLimit() 
 525                 && isIntOrGroup(getFieldPtr()[i 
- 1]) 
 526                 && !isIntOrGroup(_field
)) { 
 528             for (; j 
>= fZero 
&& isIntOrGroup(getFieldPtr()[j
]); j
--) {} 
 529             cfpos
.setState(UFIELD_CATEGORY_NUMBER
, UNUM_INTEGER_FIELD
, j 
- fZero 
+ 1, i 
- fZero
); 
 532         // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC. 
 533         if (numericField 
!= 0 
 534                 && cfpos
.matchesField(numericCAF
.category
, numericCAF
.field
) 
 536                 // don't return the same field twice in a row: 
 537                 && (i 
- fZero 
> cfpos
.getLimit() 
 538                     || cfpos
.getCategory() != numericCAF
.category
 
 539                     || cfpos
.getField() != numericCAF
.field
) 
 540                 && isNumericField(getFieldPtr()[i 
- 1]) 
 541                 && !isNumericField(_field
)) { 
 543             for (; j 
>= fZero 
&& isNumericField(getFieldPtr()[j
]); j
--) {} 
 544             cfpos
.setState(numericCAF
.category
, numericCAF
.field
, j 
- fZero 
+ 1, i 
- fZero
); 
 547         // Special case: skip over INTEGER; will be coalesced later. 
 548         if (_field 
== UNUM_INTEGER_FIELD
) { 
 549             _field 
= UNUM_FIELD_COUNT
; 
 551         // Case 2: no field starting at this position. 
 552         if (_field 
== UNUM_FIELD_COUNT 
|| _field 
== kEndField
) { 
 555         // Case 3: check for field starting at this position 
 556         auto caf 
= NumFieldUtils::expand(_field
); 
 557         if (cfpos
.matchesField(caf
.category
, caf
.field
)) { 
 558             fieldStart 
= i 
- fZero
; 
 563     U_ASSERT(currField 
== UNUM_FIELD_COUNT
); 
 567 bool NumberStringBuilder::containsField(Field field
) const { 
 568     for (int32_t i 
= 0; i 
< fLength
; i
++) { 
 569         if (field 
== fieldAt(i
)) { 
 576 bool NumberStringBuilder::isIntOrGroup(Field field
) { 
 577     return field 
== UNUM_INTEGER_FIELD
 
 578         || field 
== UNUM_GROUPING_SEPARATOR_FIELD
; 
 581 bool NumberStringBuilder::isNumericField(Field field
) { 
 582     return NumFieldUtils::isNumericField(field
); 
 585 int32_t NumberStringBuilder::trimBack(int32_t limit
) const { 
 586     return unisets::get(unisets::DEFAULT_IGNORABLES
)->spanBack( 
 587         getCharPtr() + fZero
, 
 589         USET_SPAN_CONTAINED
); 
 592 int32_t NumberStringBuilder::trimFront(int32_t start
) const { 
 593     return start 
+ unisets::get(unisets::DEFAULT_IGNORABLES
)->span( 
 594         getCharPtr() + fZero 
+ start
, 
 596         USET_SPAN_CONTAINED
); 
 599 #endif /* #if !UCONFIG_NO_FORMATTING */