]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/listformatter.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / listformatter.cpp
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2013-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: listformatter.cpp
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2012aug27
16* created by: Umesh P. Nair
17*/
18
19#include "cmemory.h"
20#include "unicode/fpositer.h" // FieldPositionIterator
21#include "unicode/listformatter.h"
22#include "unicode/simpleformatter.h"
23#include "unicode/ulistformatter.h"
24#include "unicode/uscript.h"
25#include "fphdlimp.h"
26#include "mutex.h"
27#include "hash.h"
28#include "cstring.h"
29#include "uarrsort.h"
30#include "ulocimp.h"
31#include "charstr.h"
32#include "ucln_in.h"
33#include "uresimp.h"
34#include "resource.h"
35#include "formattedval_impl.h"
36
37U_NAMESPACE_BEGIN
38
39namespace {
40
41class PatternHandler : public UObject {
42public:
43 PatternHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
44 twoPattern(two, 2, 2, errorCode),
45 endPattern(end, 2, 2, errorCode) { }
46
47 PatternHandler(const SimpleFormatter& two, const SimpleFormatter& end) :
48 twoPattern(two),
49 endPattern(end) { }
50
51 virtual ~PatternHandler();
52
53 virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); }
54
55 virtual const SimpleFormatter& getTwoPattern(const UnicodeString&, const UnicodeString&) const {
56 return twoPattern;
57 }
58
59 virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const {
60 return endPattern;
61 }
62
63protected:
64 SimpleFormatter twoPattern;
65 SimpleFormatter endPattern;
66};
67
68PatternHandler::~PatternHandler() {
69}
70
71class ContextualHandler : public PatternHandler {
72public:
73 ContextualHandler(bool (*testFunc)(const UnicodeString& text),
74 const UnicodeString& thenTwo,
75 const UnicodeString& elseTwo,
76 const UnicodeString& thenEnd,
77 const UnicodeString& elseEnd,
78 UErrorCode& errorCode) :
79 PatternHandler(elseTwo, elseEnd, errorCode),
80 test(testFunc),
81 thenTwoPattern(thenTwo, 2, 2, errorCode),
82 thenEndPattern(thenEnd, 2, 2, errorCode) { }
83
84 ContextualHandler(bool (*testFunc)(const UnicodeString& text),
85 const SimpleFormatter& thenTwo, SimpleFormatter elseTwo,
86 const SimpleFormatter& thenEnd, SimpleFormatter elseEnd) :
87 PatternHandler(elseTwo, elseEnd),
88 test(testFunc),
89 thenTwoPattern(thenTwo),
90 thenEndPattern(thenEnd) { }
91
92 ~ContextualHandler() override;
93
94 PatternHandler* clone() const override {
95 return new ContextualHandler(
96 test, thenTwoPattern, twoPattern, thenEndPattern, endPattern);
97 }
98
99 const SimpleFormatter& getTwoPattern(
100 const UnicodeString&, /*ignored*/
101 const UnicodeString& text) const override {
102 return (test)(text) ? thenTwoPattern : twoPattern;
103 }
104
105 const SimpleFormatter& getEndPattern(
106 const UnicodeString& text) const override {
107 return (test)(text) ? thenEndPattern : endPattern;
108 }
109
110private:
111 bool (*test)(const UnicodeString&);
112 SimpleFormatter thenTwoPattern;
113 SimpleFormatter thenEndPattern;
114};
115
116ContextualHandler::~ContextualHandler() {
117}
118
119class ThaiHandler : public PatternHandler {
120public:
121 ThaiHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
122 PatternHandler(two, end, errorCode),
123 twoPatternText(two),
124 endPatternText(end),
125 spaceTwoPattern(),
126 twoSpacePattern(),
127 spaceTwoSpacePattern(),
128 spaceEndPattern() {
129 bool needToDeleteSpaceAfter0 = false;
130 UnicodeString tempPattern = two;
131 if (tempPattern.indexOf(UnicodeString(u"{0} ")) < 0) {
132 tempPattern.findAndReplace(UnicodeString(u"{0}"), UnicodeString(u"{0} "));
133 needToDeleteSpaceAfter0 = true;
134 }
135 spaceTwoPattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
136 if (tempPattern.indexOf(UnicodeString(u" {1}")) < 0) {
137 tempPattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(u" {1}"));
138 }
139 spaceTwoSpacePattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
140 if (needToDeleteSpaceAfter0) {
141 tempPattern.findAndReplace(UnicodeString(u"{0} "), UnicodeString(u"{0}"));
142 }
143 twoSpacePattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
144
145 tempPattern = end;
146 if (tempPattern.indexOf(UnicodeString(u" {1}")) < 0) {
147 tempPattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(u" {1}"));
148 }
149 spaceEndPattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
150 }
151
152 ~ThaiHandler() override;
153
154 PatternHandler* clone() const override {
155 UErrorCode dummyErr = U_ZERO_ERROR;
156 return new ThaiHandler(twoPatternText, endPatternText, dummyErr);
157 }
158
159 const SimpleFormatter& getTwoPattern(
160 const UnicodeString& textBefore,
161 const UnicodeString& textAfter) const override {
162 UErrorCode err = U_ZERO_ERROR;
163 bool insertSpaceBefore = !textBefore.isEmpty() && uscript_getScript(textBefore[textBefore.length() - 1], &err) != USCRIPT_THAI;
164 bool insertSpaceAfter = !textAfter.isEmpty() && uscript_getScript(textAfter[0], &err) != USCRIPT_THAI;
165
166 if (insertSpaceBefore) {
167 return insertSpaceAfter ? spaceTwoSpacePattern : spaceTwoPattern;
168 } else {
169 return insertSpaceAfter ? twoSpacePattern : twoPattern;
170 }
171 }
172
173 const SimpleFormatter& getEndPattern(
174 const UnicodeString& text) const override {
175 UErrorCode err = U_ZERO_ERROR;
176 if (!text.isEmpty() && uscript_getScript(text[0], &err) != USCRIPT_THAI) {
177 return spaceEndPattern;
178 } else {
179 return endPattern;
180 }
181 }
182
183private:
184 UnicodeString twoPatternText;
185 UnicodeString endPatternText;
186 SimpleFormatter spaceTwoPattern;
187 SimpleFormatter twoSpacePattern;
188 SimpleFormatter spaceTwoSpacePattern;
189 SimpleFormatter spaceEndPattern;
190};
191
192ThaiHandler::~ThaiHandler() {
193}
194
195static const char16_t *spanishY = u"{0} y {1}";
196static const char16_t *spanishE = u"{0} e {1}";
197static const char16_t *spanishO = u"{0} o {1}";
198static const char16_t *spanishU = u"{0} u {1}";
199static const char16_t *hebrewVav = u"{0} \u05D5{1}";
200static const char16_t *hebrewVavDash = u"{0} \u05D5-{1}";
201
202// Condiction to change to e.
203// Starts with "hi" or "i" but not with "hie" nor "hia"
204static bool shouldChangeToE(const UnicodeString& text) {
205 int32_t len = text.length();
206 if (len == 0) { return false; }
207 // Case insensitive match hi but not hie nor hia.
208 if ((text[0] == u'h' || text[0] == u'H') &&
209 ((len > 1) && (text[1] == u'i' || text[1] == u'I')) &&
210 ((len == 2) || !(text[2] == u'a' || text[2] == u'A' || text[2] == u'e' || text[2] == u'E'))) {
211 return true;
212 }
213 // Case insensitive for "start with i"
214 if (text[0] == u'i' || text[0] == u'I') { return true; }
215 return false;
216}
217
218// Condiction to change to u.
219// Starts with "o", "ho", and "8". Also "11" by itself.
220// re: ^((o|ho|8).*|11)$
221static bool shouldChangeToU(const UnicodeString& text) {
222 int32_t len = text.length();
223 if (len == 0) { return false; }
224 // Case insensitive match o.* and 8.*
225 if (text[0] == u'o' || text[0] == u'O' || text[0] == u'8') { return true; }
226 // Case insensitive match ho.*
227 if ((text[0] == u'h' || text[0] == u'H') &&
228 ((len > 1) && (text[1] == 'o' || text[1] == u'O'))) {
229 return true;
230 }
231 // match "^11$" and "^11 .*"
232 if ((len >= 2) && text[0] == u'1' && text[1] == u'1' && (len == 2 || text[2] == u' ')) { return true; }
233 return false;
234}
235
236// Condiction to change to VAV follow by a dash.
237// Starts with non Hebrew letter.
238static bool shouldChangeToVavDash(const UnicodeString& text) {
239 if (text.isEmpty()) { return false; }
240 UErrorCode status = U_ZERO_ERROR;
241 return uscript_getScript(text.char32At(0), &status) != USCRIPT_HEBREW;
242}
243
244PatternHandler* createPatternHandler(
245 const char* lang, const UnicodeString& two, const UnicodeString& end,
246 UErrorCode& status) {
247 if (uprv_strcmp(lang, "es") == 0) {
248 // Spanish
249 UnicodeString spanishYStr(TRUE, spanishY, -1);
250 bool twoIsY = two == spanishYStr;
251 bool endIsY = end == spanishYStr;
252 if (twoIsY || endIsY) {
253 UnicodeString replacement(TRUE, spanishE, -1);
254 return new ContextualHandler(
255 shouldChangeToE,
256 twoIsY ? replacement : two, two,
257 endIsY ? replacement : end, end, status);
258 }
259 UnicodeString spanishOStr(TRUE, spanishO, -1);
260 bool twoIsO = two == spanishOStr;
261 bool endIsO = end == spanishOStr;
262 if (twoIsO || endIsO) {
263 UnicodeString replacement(TRUE, spanishU, -1);
264 return new ContextualHandler(
265 shouldChangeToU,
266 twoIsO ? replacement : two, two,
267 endIsO ? replacement : end, end, status);
268 }
269 } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) {
270 // Hebrew
271 UnicodeString hebrewVavStr(TRUE, hebrewVav, -1);
272 bool twoIsVav = two == hebrewVavStr;
273 bool endIsVav = end == hebrewVavStr;
274 if (twoIsVav || endIsVav) {
275 UnicodeString replacement(TRUE, hebrewVavDash, -1);
276 return new ContextualHandler(
277 shouldChangeToVavDash,
278 twoIsVav ? replacement : two, two,
279 endIsVav ? replacement : end, end, status);
280 }
281 } else if (uprv_strcmp(lang, "th") == 0) {
282 return new ThaiHandler(two, end, status);
283 }
284 return new PatternHandler(two, end, status);
285}
286
287} // namespace
288
289struct ListFormatInternal : public UMemory {
290 SimpleFormatter startPattern;
291 SimpleFormatter middlePattern;
292 LocalPointer<PatternHandler> patternHandler;
293
294ListFormatInternal(
295 const UnicodeString& two,
296 const UnicodeString& start,
297 const UnicodeString& middle,
298 const UnicodeString& end,
299 const Locale& locale,
300 UErrorCode &errorCode) :
301 startPattern(start, 2, 2, errorCode),
302 middlePattern(middle, 2, 2, errorCode),
303 patternHandler(createPatternHandler(locale.getLanguage(), two, end, errorCode), errorCode) { }
304
305ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
306 startPattern(data.startPattern, errorCode),
307 middlePattern(data.middlePattern, errorCode),
308 patternHandler(createPatternHandler(
309 data.locale.getLanguage(), data.twoPattern, data.endPattern, errorCode), errorCode) { }
310
311ListFormatInternal(const ListFormatInternal &other) :
312 startPattern(other.startPattern),
313 middlePattern(other.middlePattern),
314 patternHandler(other.patternHandler->clone()) { }
315};
316
317
318#if !UCONFIG_NO_FORMATTING
319class FormattedListData : public FormattedValueFieldPositionIteratorImpl {
320public:
321 FormattedListData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {}
322 virtual ~FormattedListData();
323};
324
325FormattedListData::~FormattedListData() = default;
326
327UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedList)
328#endif
329
330
331static Hashtable* listPatternHash = nullptr;
332
333U_CDECL_BEGIN
334static UBool U_CALLCONV uprv_listformatter_cleanup() {
335 delete listPatternHash;
336 listPatternHash = nullptr;
337 return TRUE;
338}
339
340static void U_CALLCONV
341uprv_deleteListFormatInternal(void *obj) {
342 delete static_cast<ListFormatInternal *>(obj);
343}
344
345U_CDECL_END
346
347ListFormatter::ListFormatter(const ListFormatter& other) :
348 owned(other.owned), data(other.data) {
349 if (other.owned != nullptr) {
350 owned = new ListFormatInternal(*other.owned);
351 data = owned;
352 }
353}
354
355ListFormatter& ListFormatter::operator=(const ListFormatter& other) {
356 if (this == &other) {
357 return *this;
358 }
359 delete owned;
360 if (other.owned) {
361 owned = new ListFormatInternal(*other.owned);
362 data = owned;
363 } else {
364 owned = nullptr;
365 data = other.data;
366 }
367 return *this;
368}
369
370void ListFormatter::initializeHash(UErrorCode& errorCode) {
371 if (U_FAILURE(errorCode)) {
372 return;
373 }
374
375 listPatternHash = new Hashtable();
376 if (listPatternHash == nullptr) {
377 errorCode = U_MEMORY_ALLOCATION_ERROR;
378 return;
379 }
380
381 listPatternHash->setValueDeleter(uprv_deleteListFormatInternal);
382 ucln_i18n_registerCleanup(UCLN_I18N_LIST_FORMATTER, uprv_listformatter_cleanup);
383
384}
385
386const ListFormatInternal* ListFormatter::getListFormatInternal(
387 const Locale& locale, const char *style, UErrorCode& errorCode) {
388 if (U_FAILURE(errorCode)) {
389 return nullptr;
390 }
391 CharString keyBuffer(locale.getName(), errorCode);
392 keyBuffer.append(':', errorCode).append(style, errorCode);
393 UnicodeString key(keyBuffer.data(), -1, US_INV);
394 ListFormatInternal* result = nullptr;
395 static UMutex listFormatterMutex;
396 {
397 Mutex m(&listFormatterMutex);
398 if (listPatternHash == nullptr) {
399 initializeHash(errorCode);
400 if (U_FAILURE(errorCode)) {
401 return nullptr;
402 }
403 }
404 result = static_cast<ListFormatInternal*>(listPatternHash->get(key));
405 }
406 if (result != nullptr) {
407 return result;
408 }
409 result = loadListFormatInternal(locale, style, errorCode);
410 if (U_FAILURE(errorCode)) {
411 return nullptr;
412 }
413
414 {
415 Mutex m(&listFormatterMutex);
416 ListFormatInternal* temp = static_cast<ListFormatInternal*>(listPatternHash->get(key));
417 if (temp != nullptr) {
418 delete result;
419 result = temp;
420 } else {
421 listPatternHash->put(key, result, errorCode);
422 if (U_FAILURE(errorCode)) {
423 return nullptr;
424 }
425 }
426 }
427 return result;
428}
429
430#if !UCONFIG_NO_FORMATTING
431static const char* typeWidthToStyleString(UListFormatterType type, UListFormatterWidth width) {
432 switch (type) {
433 case ULISTFMT_TYPE_AND:
434 switch (width) {
435 case ULISTFMT_WIDTH_WIDE:
436 return "standard";
437 case ULISTFMT_WIDTH_SHORT:
438 return "standard-short";
439 case ULISTFMT_WIDTH_NARROW:
440 return "standard-narrow";
441 default:
442 return nullptr;
443 }
444 break;
445
446 case ULISTFMT_TYPE_OR:
447 switch (width) {
448 case ULISTFMT_WIDTH_WIDE:
449 return "or";
450 case ULISTFMT_WIDTH_SHORT:
451 return "or-short";
452 case ULISTFMT_WIDTH_NARROW:
453 return "or-narrow";
454 default:
455 return nullptr;
456 }
457 break;
458
459 case ULISTFMT_TYPE_UNITS:
460 switch (width) {
461 case ULISTFMT_WIDTH_WIDE:
462 return "unit";
463 case ULISTFMT_WIDTH_SHORT:
464 return "unit-short";
465 case ULISTFMT_WIDTH_NARROW:
466 return "unit-narrow";
467 default:
468 return nullptr;
469 }
470 }
471
472 return nullptr;
473}
474#endif
475
476static const UChar solidus = 0x2F;
477static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/"
478enum {
479 kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix),
480 kStyleLenMax = 24 // longest currently is 14
481};
482
483struct ListFormatter::ListPatternsSink : public ResourceSink {
484 UnicodeString two, start, middle, end;
485#if ((U_PLATFORM == U_PF_AIX) || (U_PLATFORM == U_PF_OS390)) && (U_CPLUSPLUS_VERSION < 11)
486 char aliasedStyle[kStyleLenMax+1];
487 ListPatternsSink() {
488 uprv_memset(aliasedStyle, 0, kStyleLenMax+1);
489 }
490#else
491 char aliasedStyle[kStyleLenMax+1] = {0};
492
493 ListPatternsSink() {}
494#endif
495 virtual ~ListPatternsSink();
496
497 void setAliasedStyle(UnicodeString alias) {
498 int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0);
499 if (startIndex < 0) {
500 return;
501 }
502 startIndex += kAliasPrefixLen;
503 int32_t endIndex = alias.indexOf(solidus, startIndex);
504 if (endIndex < 0) {
505 endIndex = alias.length();
506 }
507 alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV);
508 aliasedStyle[kStyleLenMax] = 0;
509 }
510
511 void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) {
512 if (pattern.isEmpty()) {
513 if (value.getType() == URES_ALIAS) {
514 if (aliasedStyle[0] == 0) {
515 setAliasedStyle(value.getAliasUnicodeString(errorCode));
516 }
517 } else {
518 pattern = value.getUnicodeString(errorCode);
519 }
520 }
521 }
522
523 virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
524 UErrorCode &errorCode) {
525 aliasedStyle[0] = 0;
526 if (value.getType() == URES_ALIAS) {
527 setAliasedStyle(value.getAliasUnicodeString(errorCode));
528 return;
529 }
530 ResourceTable listPatterns = value.getTable(errorCode);
531 for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) {
532 if (uprv_strcmp(key, "2") == 0) {
533 handleValueForPattern(value, two, errorCode);
534 } else if (uprv_strcmp(key, "end") == 0) {
535 handleValueForPattern(value, end, errorCode);
536 } else if (uprv_strcmp(key, "middle") == 0) {
537 handleValueForPattern(value, middle, errorCode);
538 } else if (uprv_strcmp(key, "start") == 0) {
539 handleValueForPattern(value, start, errorCode);
540 }
541 }
542 }
543};
544
545// Virtual destructors must be defined out of line.
546ListFormatter::ListPatternsSink::~ListPatternsSink() {}
547
548ListFormatInternal* ListFormatter::loadListFormatInternal(
549 const Locale& locale, const char * style, UErrorCode& errorCode) {
550 UResourceBundle* rb = ures_open(nullptr, locale.getName(), &errorCode);
551 rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
552 if (U_FAILURE(errorCode)) {
553 ures_close(rb);
554 return nullptr;
555 }
556 ListFormatter::ListPatternsSink sink;
557 char currentStyle[kStyleLenMax+1];
558 uprv_strncpy(currentStyle, style, kStyleLenMax);
559 currentStyle[kStyleLenMax] = 0;
560
561 for (;;) {
562 ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode);
563 if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) {
564 break;
565 }
566 uprv_strcpy(currentStyle, sink.aliasedStyle);
567 }
568 ures_close(rb);
569 if (U_FAILURE(errorCode)) {
570 return nullptr;
571 }
572 if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) {
573 errorCode = U_MISSING_RESOURCE_ERROR;
574 return nullptr;
575 }
576
577 ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, locale, errorCode);
578 if (result == nullptr) {
579 errorCode = U_MEMORY_ALLOCATION_ERROR;
580 return nullptr;
581 }
582 if (U_FAILURE(errorCode)) {
583 delete result;
584 return nullptr;
585 }
586 return result;
587}
588
589ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
590 Locale locale; // The default locale.
591 return createInstance(locale, errorCode);
592}
593
594ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
595#if !UCONFIG_NO_FORMATTING
596 return createInstance(locale, ULISTFMT_TYPE_AND, ULISTFMT_WIDTH_WIDE, errorCode);
597#else
598 return createInstance(locale, "standard", errorCode);
599#endif
600}
601
602#if !UCONFIG_NO_FORMATTING
603ListFormatter* ListFormatter::createInstance(
604 const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode) {
605 const char* style = typeWidthToStyleString(type, width);
606 if (style == nullptr) {
607 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
608 return nullptr;
609 }
610 return createInstance(locale, style, errorCode);
611}
612#endif
613
614ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) {
615 const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode);
616 if (U_FAILURE(errorCode)) {
617 return nullptr;
618 }
619 ListFormatter* p = new ListFormatter(listFormatInternal);
620 if (p == nullptr) {
621 errorCode = U_MEMORY_ALLOCATION_ERROR;
622 return nullptr;
623 }
624 return p;
625}
626
627ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &errorCode) {
628 owned = new ListFormatInternal(listFormatData, errorCode);
629 data = owned;
630}
631
632ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(nullptr), data(listFormatterInternal) {
633}
634
635ListFormatter::~ListFormatter() {
636 delete owned;
637}
638
639/**
640 * Joins first and second using the pattern pat.
641 * On entry offset is an offset into first or -1 if offset unspecified.
642 * On exit offset is offset of second in result if recordOffset was set
643 * Otherwise if it was >=0 it is set to point into result where it used
644 * to point into first. On exit, result is the join of first and second
645 * according to pat. Any previous value of result gets replaced.
646 */
647static void joinStringsAndReplace(
648 const SimpleFormatter& pat,
649 const UnicodeString& first,
650 const UnicodeString& second,
651 UnicodeString &result,
652 UBool recordOffset,
653 int32_t &offset,
654 int32_t *offsetFirst,
655 int32_t *offsetSecond,
656 UErrorCode& errorCode) {
657 if (U_FAILURE(errorCode)) {
658 return;
659 }
660 const UnicodeString *params[2] = {&first, &second};
661 int32_t offsets[2];
662 pat.formatAndReplace(
663 params,
664 UPRV_LENGTHOF(params),
665 result,
666 offsets,
667 UPRV_LENGTHOF(offsets),
668 errorCode);
669 if (U_FAILURE(errorCode)) {
670 return;
671 }
672 if (offsets[0] == -1 || offsets[1] == -1) {
673 errorCode = U_INVALID_FORMAT_ERROR;
674 return;
675 }
676 if (recordOffset) {
677 offset = offsets[1];
678 } else if (offset >= 0) {
679 offset += offsets[0];
680 }
681 if (offsetFirst != nullptr) *offsetFirst = offsets[0];
682 if (offsetSecond != nullptr) *offsetSecond = offsets[1];
683}
684
685UnicodeString& ListFormatter::format(
686 const UnicodeString items[],
687 int32_t nItems,
688 UnicodeString& appendTo,
689 UErrorCode& errorCode) const {
690 int32_t offset;
691 return format(items, nItems, appendTo, -1, offset, errorCode);
692}
693
694#if !UCONFIG_NO_FORMATTING
695UnicodeString& ListFormatter::format(
696 const UnicodeString items[],
697 int32_t nItems,
698 UnicodeString & appendTo,
699 FieldPositionIterator* posIter,
700 UErrorCode& errorCode) const {
701 int32_t offset;
702 FieldPositionIteratorHandler handler(posIter, errorCode);
703 return format_(items, nItems, appendTo, -1, offset, &handler, errorCode);
704}
705#endif
706
707UnicodeString& ListFormatter::format(
708 const UnicodeString items[],
709 int32_t nItems,
710 UnicodeString& appendTo,
711 int32_t index,
712 int32_t &offset,
713 UErrorCode& errorCode) const {
714 return format_(items, nItems, appendTo, index, offset, nullptr, errorCode);
715}
716
717#if !UCONFIG_NO_FORMATTING
718FormattedList ListFormatter::formatStringsToValue(
719 const UnicodeString items[],
720 int32_t nItems,
721 UErrorCode& errorCode) const {
722 LocalPointer<FormattedListData> result(new FormattedListData(errorCode), errorCode);
723 if (U_FAILURE(errorCode)) {
724 return FormattedList(errorCode);
725 }
726 UnicodeString string;
727 int32_t offset;
728 auto handler = result->getHandler(errorCode);
729 handler.setCategory(UFIELD_CATEGORY_LIST);
730 format_(items, nItems, string, -1, offset, &handler, errorCode);
731 handler.getError(errorCode);
732 result->appendString(string, errorCode);
733 if (U_FAILURE(errorCode)) {
734 return FormattedList(errorCode);
735 }
736
737 // Add span fields and sort
738 ConstrainedFieldPosition cfpos;
739 cfpos.constrainField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD);
740 int32_t i = 0;
741 handler.setCategory(UFIELD_CATEGORY_LIST_SPAN);
742 while (result->nextPosition(cfpos, errorCode)) {
743 handler.addAttribute(i++, cfpos.getStart(), cfpos.getLimit());
744 }
745 handler.getError(errorCode);
746 if (U_FAILURE(errorCode)) {
747 return FormattedList(errorCode);
748 }
749 result->sort();
750
751 return FormattedList(result.orphan());
752}
753#endif
754
755UnicodeString& ListFormatter::format_(
756 const UnicodeString items[],
757 int32_t nItems,
758 UnicodeString& appendTo,
759 int32_t index,
760 int32_t &offset,
761 FieldPositionHandler* handler,
762 UErrorCode& errorCode) const {
763#if !UCONFIG_NO_FORMATTING
764 offset = -1;
765 if (U_FAILURE(errorCode)) {
766 return appendTo;
767 }
768 if (data == nullptr) {
769 errorCode = U_INVALID_STATE_ERROR;
770 return appendTo;
771 }
772
773 if (nItems <= 0) {
774 return appendTo;
775 }
776 if (nItems == 1) {
777 if (index == 0) {
778 offset = appendTo.length();
779 }
780 if (handler != nullptr) {
781 handler->addAttribute(ULISTFMT_ELEMENT_FIELD,
782 appendTo.length(),
783 appendTo.length() + items[0].length());
784 }
785 appendTo.append(items[0]);
786 return appendTo;
787 }
788 UnicodeString result(items[0]);
789 if (index == 0) {
790 offset = 0;
791 }
792 int32_t offsetFirst = 0;
793 int32_t offsetSecond = 0;
794 int32_t prefixLength = 0;
795 // for n items, there are 2 * (n + 1) boundary including 0 and the upper
796 // edge.
797 MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0);
798 if (nItems == 2) {
799 joinStringsAndReplace(
800 data->patternHandler->getTwoPattern(items[0], items[1]),
801 result,
802 items[1],
803 result,
804 index == 1,
805 offset,
806 &offsetFirst,
807 &offsetSecond,
808 errorCode);
809 } else {
810 joinStringsAndReplace(
811 data->startPattern,
812 result,
813 items[1],
814 result,
815 index == 1,
816 offset,
817 &offsetFirst,
818 &offsetSecond,
819 errorCode);
820 }
821 if (handler != nullptr) {
822 offsets[0] = 0;
823 prefixLength += offsetFirst;
824 offsets[1] = offsetSecond - prefixLength;
825 }
826 if (nItems > 2) {
827 for (int32_t i = 2; i < nItems - 1; ++i) {
828 joinStringsAndReplace(
829 data->middlePattern,
830 result,
831 items[i],
832 result,
833 index == i,
834 offset,
835 &offsetFirst,
836 &offsetSecond,
837 errorCode);
838 if (handler != nullptr) {
839 prefixLength += offsetFirst;
840 offsets[i] = offsetSecond - prefixLength;
841 }
842 }
843 joinStringsAndReplace(
844 data->patternHandler->getEndPattern(items[nItems - 1]),
845 result,
846 items[nItems - 1],
847 result,
848 index == nItems - 1,
849 offset,
850 &offsetFirst,
851 &offsetSecond,
852 errorCode);
853 if (handler != nullptr) {
854 prefixLength += offsetFirst;
855 offsets[nItems - 1] = offsetSecond - prefixLength;
856 }
857 }
858 if (handler != nullptr) {
859 // If there are already some data in appendTo, we need to adjust the index
860 // by shifting that lenght while insert into handler.
861 int32_t shift = appendTo.length() + prefixLength;
862 // Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements
863 for (int32_t i = 0; i < nItems; ++i) {
864 offsets[i + nItems] = offsets[i] + items[i].length() + shift;
865 offsets[i] += shift;
866 handler->addAttribute(
867 ULISTFMT_ELEMENT_FIELD, // id
868 offsets[i], // index
869 offsets[i + nItems]); // limit
870 }
871 // The locale pattern may reorder the items (such as in ur-IN locale),
872 // so we cannot assume the array is in accendning order.
873 // To handle the edging case, just insert the two ends into the array
874 // and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies
875 // between the even and odd position are not the same in the sorted array.
876 offsets[2 * nItems] = shift - prefixLength;
877 offsets[2 * nItems + 1] = result.length() + shift - prefixLength;
878 uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t),
879 uprv_int32Comparator, nullptr,
880 false, &errorCode);
881 for (int32_t i = 0; i <= nItems; ++i) {
882 if (offsets[i * 2] != offsets[i * 2 + 1]) {
883 handler->addAttribute(
884 ULISTFMT_LITERAL_FIELD, // id
885 offsets[i * 2], // index
886 offsets[i * 2 + 1]); // limit
887 }
888 }
889 }
890 if (U_SUCCESS(errorCode)) {
891 if (offset >= 0) {
892 offset += appendTo.length();
893 }
894 appendTo += result;
895 }
896#endif
897 return appendTo;
898}
899
900U_NAMESPACE_END