]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/number_stringbuilder.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / number_stringbuilder.cpp
CommitLineData
0f5d89e8
A
1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include "unicode/utypes.h"
5
6#if !UCONFIG_NO_FORMATTING
7
8#include "number_stringbuilder.h"
3d1f044b 9#include "static_unicode_sets.h"
0f5d89e8 10#include "unicode/utf16.h"
3d1f044b 11#include "number_utils.h"
0f5d89e8
A
12
13using namespace icu;
14using namespace icu::number;
15using namespace icu::number::impl;
16
17namespace {
18
19// A version of uprv_memcpy that checks for length 0.
20// By default, uprv_memcpy requires a length of at least 1.
21inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
22 if (len > 0) {
23 uprv_memcpy(dest, src, len);
24 }
25}
26
27// A version of uprv_memmove that checks for length 0.
28// By default, uprv_memmove requires a length of at least 1.
29inline void uprv_memmove2(void* dest, const void* src, size_t len) {
30 if (len > 0) {
31 uprv_memmove(dest, src, len);
32 }
33}
34
35} // namespace
36
3d1f044b
A
37NumberStringBuilder::NumberStringBuilder() {
38#if U_DEBUG
39 // Initializing the memory to non-zero helps catch some bugs that involve
40 // reading from an improperly terminated string.
41 for (int32_t i=0; i<getCapacity(); i++) {
42 getCharPtr()[i] = 1;
43 }
44#endif
45}
0f5d89e8
A
46
47NumberStringBuilder::~NumberStringBuilder() {
48 if (fUsingHeap) {
49 uprv_free(fChars.heap.ptr);
50 uprv_free(fFields.heap.ptr);
51 }
52}
53
54NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) {
55 *this = other;
56}
57
58NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) {
59 // Check for self-assignment
60 if (this == &other) {
61 return *this;
62 }
63
64 // Continue with deallocation and copying
65 if (fUsingHeap) {
66 uprv_free(fChars.heap.ptr);
67 uprv_free(fFields.heap.ptr);
68 fUsingHeap = false;
69 }
70
71 int32_t capacity = other.getCapacity();
72 if (capacity > DEFAULT_CAPACITY) {
73 // FIXME: uprv_malloc
74 // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
75 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
76 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
77 if (newChars == nullptr || newFields == nullptr) {
78 // UErrorCode is not available; fail silently.
79 uprv_free(newChars);
80 uprv_free(newFields);
81 *this = NumberStringBuilder(); // can't fail
82 return *this;
83 }
84
85 fUsingHeap = true;
86 fChars.heap.capacity = capacity;
87 fChars.heap.ptr = newChars;
88 fFields.heap.capacity = capacity;
89 fFields.heap.ptr = newFields;
90 }
91
92 uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
93 uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
94
95 fZero = other.fZero;
96 fLength = other.fLength;
97 return *this;
98}
99
100int32_t NumberStringBuilder::length() const {
101 return fLength;
102}
103
104int32_t NumberStringBuilder::codePointCount() const {
105 return u_countChar32(getCharPtr() + fZero, fLength);
106}
107
108UChar32 NumberStringBuilder::getFirstCodePoint() const {
109 if (fLength == 0) {
110 return -1;
111 }
112 UChar32 cp;
113 U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
114 return cp;
115}
116
117UChar32 NumberStringBuilder::getLastCodePoint() const {
118 if (fLength == 0) {
119 return -1;
120 }
121 int32_t offset = fLength;
122 U16_BACK_1(getCharPtr() + fZero, 0, offset);
123 UChar32 cp;
124 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
125 return cp;
126}
127
128UChar32 NumberStringBuilder::codePointAt(int32_t index) const {
129 UChar32 cp;
130 U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
131 return cp;
132}
133
134UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
135 int32_t offset = index;
136 U16_BACK_1(getCharPtr() + fZero, 0, offset);
137 UChar32 cp;
138 U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
139 return cp;
140}
141
142NumberStringBuilder &NumberStringBuilder::clear() {
143 // TODO: Reset the heap here?
144 fZero = getCapacity() / 2;
145 fLength = 0;
146 return *this;
147}
148
149int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
150 return insertCodePoint(fLength, codePoint, field, status);
151}
152
153int32_t
154NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
155 int32_t count = U16_LENGTH(codePoint);
156 int32_t position = prepareForInsert(index, count, status);
157 if (U_FAILURE(status)) {
158 return count;
159 }
160 if (count == 1) {
161 getCharPtr()[position] = (char16_t) codePoint;
162 getFieldPtr()[position] = field;
163 } else {
164 getCharPtr()[position] = U16_LEAD(codePoint);
165 getCharPtr()[position + 1] = U16_TRAIL(codePoint);
166 getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
167 }
168 return count;
169}
170
171int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
172 return insert(fLength, unistr, field, status);
173}
174
175int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
176 UErrorCode &status) {
177 if (unistr.length() == 0) {
178 // Nothing to insert.
179 return 0;
180 } else if (unistr.length() == 1) {
181 // Fast path: insert using insertCodePoint.
182 return insertCodePoint(index, unistr.charAt(0), field, status);
183 } else {
184 return insert(index, unistr, 0, unistr.length(), field, status);
185 }
186}
187
188int32_t
189NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
190 Field field, UErrorCode &status) {
191 int32_t count = end - start;
192 int32_t position = prepareForInsert(index, count, status);
193 if (U_FAILURE(status)) {
194 return count;
195 }
196 for (int32_t i = 0; i < count; i++) {
197 getCharPtr()[position + i] = unistr.charAt(start + i);
198 getFieldPtr()[position + i] = field;
199 }
200 return count;
201}
202
203int32_t
204NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
205 int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
206 int32_t thisLength = endThis - startThis;
207 int32_t otherLength = endOther - startOther;
208 int32_t count = otherLength - thisLength;
209 int32_t position;
210 if (count > 0) {
211 // Overall, chars need to be added.
212 position = prepareForInsert(startThis, count, status);
213 } else {
214 // Overall, chars need to be removed or kept the same.
215 position = remove(startThis, -count);
216 }
217 if (U_FAILURE(status)) {
218 return count;
219 }
220 for (int32_t i = 0; i < otherLength; i++) {
221 getCharPtr()[position + i] = unistr.charAt(startOther + i);
222 getFieldPtr()[position + i] = field;
223 }
224 return count;
225}
226
227int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) {
228 return insert(fLength, other, status);
229}
230
231int32_t
232NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) {
233 if (this == &other) {
234 status = U_ILLEGAL_ARGUMENT_ERROR;
235 return 0;
236 }
237 int32_t count = other.fLength;
238 if (count == 0) {
239 // Nothing to insert.
240 return 0;
241 }
242 int32_t position = prepareForInsert(index, count, status);
243 if (U_FAILURE(status)) {
244 return count;
245 }
246 for (int32_t i = 0; i < count; i++) {
247 getCharPtr()[position + i] = other.charAt(i);
248 getFieldPtr()[position + i] = other.fieldAt(i);
249 }
250 return count;
251}
252
3d1f044b
A
253void NumberStringBuilder::writeTerminator(UErrorCode& status) {
254 int32_t position = prepareForInsert(fLength, 1, status);
255 if (U_FAILURE(status)) {
256 return;
257 }
258 getCharPtr()[position] = 0;
259 getFieldPtr()[position] = UNUM_FIELD_COUNT;
260 fLength--;
261}
262
0f5d89e8 263int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
3d1f044b
A
264 U_ASSERT(index >= 0);
265 U_ASSERT(index <= fLength);
266 U_ASSERT(count >= 0);
0f5d89e8
A
267 if (index == 0 && fZero - count >= 0) {
268 // Append to start
269 fZero -= count;
270 fLength += count;
271 return fZero;
272 } else if (index == fLength && fZero + fLength + count < getCapacity()) {
273 // Append to end
274 fLength += count;
275 return fZero + fLength - count;
276 } else {
277 // Move chars around and/or allocate more space
278 return prepareForInsertHelper(index, count, status);
279 }
280}
281
282int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
283 int32_t oldCapacity = getCapacity();
284 int32_t oldZero = fZero;
285 char16_t *oldChars = getCharPtr();
286 Field *oldFields = getFieldPtr();
287 if (fLength + count > oldCapacity) {
288 int32_t newCapacity = (fLength + count) * 2;
289 int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
290
291 // C++ note: malloc appears in two places: here and in the assignment operator.
292 auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
293 auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
294 if (newChars == nullptr || newFields == nullptr) {
295 uprv_free(newChars);
296 uprv_free(newFields);
297 status = U_MEMORY_ALLOCATION_ERROR;
298 return -1;
299 }
300
301 // First copy the prefix and then the suffix, leaving room for the new chars that the
302 // caller wants to insert.
303 // C++ note: memcpy is OK because the src and dest do not overlap.
304 uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
305 uprv_memcpy2(newChars + newZero + index + count,
306 oldChars + oldZero + index,
307 sizeof(char16_t) * (fLength - index));
308 uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
309 uprv_memcpy2(newFields + newZero + index + count,
310 oldFields + oldZero + index,
311 sizeof(Field) * (fLength - index));
312
313 if (fUsingHeap) {
314 uprv_free(oldChars);
315 uprv_free(oldFields);
316 }
317 fUsingHeap = true;
318 fChars.heap.ptr = newChars;
319 fChars.heap.capacity = newCapacity;
320 fFields.heap.ptr = newFields;
321 fFields.heap.capacity = newCapacity;
322 fZero = newZero;
323 fLength += count;
324 } else {
325 int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
326
327 // C++ note: memmove is required because src and dest may overlap.
328 // First copy the entire string to the location of the prefix, and then move the suffix
329 // to make room for the new chars that the caller wants to insert.
330 uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
331 uprv_memmove2(oldChars + newZero + index + count,
332 oldChars + newZero + index,
333 sizeof(char16_t) * (fLength - index));
334 uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
335 uprv_memmove2(oldFields + newZero + index + count,
336 oldFields + newZero + index,
337 sizeof(Field) * (fLength - index));
338
339 fZero = newZero;
340 fLength += count;
341 }
342 return fZero + index;
343}
344
345int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
346 // TODO: Reset the heap here? (If the string after removal can fit on stack?)
347 int32_t position = index + fZero;
348 uprv_memmove2(getCharPtr() + position,
349 getCharPtr() + position + count,
350 sizeof(char16_t) * (fLength - index - count));
351 uprv_memmove2(getFieldPtr() + position,
352 getFieldPtr() + position + count,
353 sizeof(Field) * (fLength - index - count));
354 fLength -= count;
355 return position;
356}
357
358UnicodeString NumberStringBuilder::toUnicodeString() const {
359 return UnicodeString(getCharPtr() + fZero, fLength);
360}
361
362const UnicodeString NumberStringBuilder::toTempUnicodeString() const {
363 // Readonly-alias constructor:
364 return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
365}
366
367UnicodeString NumberStringBuilder::toDebugString() const {
368 UnicodeString sb;
369 sb.append(u"<NumberStringBuilder [", -1);
370 sb.append(toUnicodeString());
371 sb.append(u"] [", -1);
372 for (int i = 0; i < fLength; i++) {
373 if (fieldAt(i) == UNUM_FIELD_COUNT) {
374 sb.append(u'n');
375 } else {
376 char16_t c;
377 switch (fieldAt(i)) {
378 case UNUM_SIGN_FIELD:
379 c = u'-';
380 break;
381 case UNUM_INTEGER_FIELD:
382 c = u'i';
383 break;
384 case UNUM_FRACTION_FIELD:
385 c = u'f';
386 break;
387 case UNUM_EXPONENT_FIELD:
388 c = u'e';
389 break;
390 case UNUM_EXPONENT_SIGN_FIELD:
391 c = u'+';
392 break;
393 case UNUM_EXPONENT_SYMBOL_FIELD:
394 c = u'E';
395 break;
396 case UNUM_DECIMAL_SEPARATOR_FIELD:
397 c = u'.';
398 break;
399 case UNUM_GROUPING_SEPARATOR_FIELD:
400 c = u',';
401 break;
402 case UNUM_PERCENT_FIELD:
403 c = u'%';
404 break;
405 case UNUM_PERMILL_FIELD:
406 c = u'‰';
407 break;
408 case UNUM_CURRENCY_FIELD:
409 c = u'$';
410 break;
411 default:
412 c = u'?';
413 break;
414 }
415 sb.append(c);
416 }
417 }
418 sb.append(u"]>", -1);
419 return sb;
420}
421
422const char16_t *NumberStringBuilder::chars() const {
423 return getCharPtr() + fZero;
424}
425
426bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const {
427 if (fLength != other.fLength) {
428 return false;
429 }
430 for (int32_t i = 0; i < fLength; i++) {
431 if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
432 return false;
433 }
434 }
435 return true;
436}
437
438bool NumberStringBuilder::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
439 int32_t rawField = fp.getField();
440
441 if (rawField == FieldPosition::DONT_CARE) {
442 return FALSE;
443 }
444
445 if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
446 status = U_ILLEGAL_ARGUMENT_ERROR;
447 return FALSE;
448 }
449
3d1f044b
A
450 ConstrainedFieldPosition cfpos;
451 cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
452 cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
453 if (nextPosition(cfpos, 0, status)) {
454 fp.setBeginIndex(cfpos.getStart());
455 fp.setEndIndex(cfpos.getLimit());
456 return true;
457 }
0f5d89e8 458
3d1f044b
A
459 // Special case: fraction should start after integer if fraction is not present
460 if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
461 bool inside = false;
462 int32_t i = fZero;
463 for (; i < fZero + fLength; i++) {
464 if (isIntOrGroup(getFieldPtr()[i]) || getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) {
465 inside = true;
466 } else if (inside) {
467 break;
0f5d89e8 468 }
0f5d89e8 469 }
3d1f044b
A
470 fp.setBeginIndex(i - fZero);
471 fp.setEndIndex(i - fZero);
0f5d89e8
A
472 }
473
3d1f044b 474 return false;
0f5d89e8
A
475}
476
477void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
478 UErrorCode& status) const {
3d1f044b
A
479 ConstrainedFieldPosition cfpos;
480 while (nextPosition(cfpos, 0, status)) {
481 fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
482 }
483}
484
485// Signal the end of the string using a field that doesn't exist and that is
486// different from UNUM_FIELD_COUNT, which is used for "null number field".
487static constexpr Field kEndField = 0xff;
488
489bool NumberStringBuilder::nextPosition(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
490 auto numericCAF = NumFieldUtils::expand(numericField);
491 int32_t fieldStart = -1;
492 Field currField = UNUM_FIELD_COUNT;
493 for (int32_t i = fZero + cfpos.getLimit(); i <= fZero + fLength; i++) {
494 Field _field = (i < fZero + fLength) ? getFieldPtr()[i] : kEndField;
495 // Case 1: currently scanning a field.
496 if (currField != UNUM_FIELD_COUNT) {
497 if (currField != _field) {
498 int32_t end = i - fZero;
499 // Grouping separators can be whitespace; don't throw them out!
500 if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
501 end = trimBack(i - fZero);
502 }
503 if (end <= fieldStart) {
504 // Entire field position is ignorable; skip.
505 fieldStart = -1;
506 currField = UNUM_FIELD_COUNT;
507 i--; // look at this index again
508 continue;
509 }
510 int32_t start = fieldStart;
511 if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
512 start = trimFront(start);
513 }
514 auto caf = NumFieldUtils::expand(currField);
515 cfpos.setState(caf.category, caf.field, start, end);
516 return true;
0f5d89e8 517 }
3d1f044b
A
518 continue;
519 }
520 // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
521 if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
522 && i > fZero
523 // don't return the same field twice in a row:
524 && i - fZero > cfpos.getLimit()
525 && isIntOrGroup(getFieldPtr()[i - 1])
526 && !isIntOrGroup(_field)) {
527 int j = i - 1;
528 for (; j >= fZero && isIntOrGroup(getFieldPtr()[j]); j--) {}
529 cfpos.setState(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, j - fZero + 1, i - fZero);
530 return true;
0f5d89e8 531 }
3d1f044b
A
532 // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
533 if (numericField != 0
534 && cfpos.matchesField(numericCAF.category, numericCAF.field)
535 && i > fZero
536 // don't return the same field twice in a row:
537 && (i - fZero > cfpos.getLimit()
538 || cfpos.getCategory() != numericCAF.category
539 || cfpos.getField() != numericCAF.field)
540 && isNumericField(getFieldPtr()[i - 1])
541 && !isNumericField(_field)) {
542 int j = i - 1;
543 for (; j >= fZero && isNumericField(getFieldPtr()[j]); j--) {}
544 cfpos.setState(numericCAF.category, numericCAF.field, j - fZero + 1, i - fZero);
545 return true;
546 }
547 // Special case: skip over INTEGER; will be coalesced later.
548 if (_field == UNUM_INTEGER_FIELD) {
549 _field = UNUM_FIELD_COUNT;
550 }
551 // Case 2: no field starting at this position.
552 if (_field == UNUM_FIELD_COUNT || _field == kEndField) {
553 continue;
554 }
555 // Case 3: check for field starting at this position
556 auto caf = NumFieldUtils::expand(_field);
557 if (cfpos.matchesField(caf.category, caf.field)) {
558 fieldStart = i - fZero;
559 currField = _field;
0f5d89e8
A
560 }
561 }
3d1f044b
A
562
563 U_ASSERT(currField == UNUM_FIELD_COUNT);
564 return false;
565}
566
567bool NumberStringBuilder::containsField(Field field) const {
568 for (int32_t i = 0; i < fLength; i++) {
569 if (field == fieldAt(i)) {
570 return true;
571 }
0f5d89e8 572 }
3d1f044b
A
573 return false;
574}
575
576bool NumberStringBuilder::isIntOrGroup(Field field) {
577 return field == UNUM_INTEGER_FIELD
578 || field == UNUM_GROUPING_SEPARATOR_FIELD;
579}
580
581bool NumberStringBuilder::isNumericField(Field field) {
582 return NumFieldUtils::isNumericField(field);
583}
584
585int32_t NumberStringBuilder::trimBack(int32_t limit) const {
586 return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
587 getCharPtr() + fZero,
588 limit,
589 USET_SPAN_CONTAINED);
590}
591
592int32_t NumberStringBuilder::trimFront(int32_t start) const {
593 return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
594 getCharPtr() + fZero + start,
595 fLength - start,
596 USET_SPAN_CONTAINED);
0f5d89e8
A
597}
598
599#endif /* #if !UCONFIG_NO_FORMATTING */