]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/listformatter.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / listformatter.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
51004dcb
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 2013-2016, International Business Machines
51004dcb
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: listformatter.cpp
f3c0d7a5 11* encoding: UTF-8
51004dcb
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2012aug27
16* created by: Umesh P. Nair
17*/
18
3d1f044b
A
19#include "cmemory.h"
20#include "unicode/fpositer.h" // FieldPositionIterator
51004dcb 21#include "unicode/listformatter.h"
2ca993e8 22#include "unicode/simpleformatter.h"
3d1f044b 23#include "unicode/ulistformatter.h"
340931cb 24#include "unicode/uscript.h"
3d1f044b 25#include "fphdlimp.h"
51004dcb
A
26#include "mutex.h"
27#include "hash.h"
28#include "cstring.h"
3d1f044b 29#include "uarrsort.h"
51004dcb
A
30#include "ulocimp.h"
31#include "charstr.h"
3d1f044b 32#include "ucln_in.h"
51004dcb 33#include "uresimp.h"
f3c0d7a5 34#include "resource.h"
3d1f044b 35#include "formattedval_impl.h"
51004dcb
A
36
37U_NAMESPACE_BEGIN
38
340931cb
A
39namespace {
40
41class PatternHandler : public UObject {
42public:
43 PatternHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
44 twoPattern(two, 2, 2, errorCode),
45 endPattern(end, 2, 2, errorCode) { }
46
47 PatternHandler(const SimpleFormatter& two, const SimpleFormatter& end) :
48 twoPattern(two),
49 endPattern(end) { }
50
51 virtual ~PatternHandler();
52
53 virtual PatternHandler* clone() const { return new PatternHandler(twoPattern, endPattern); }
54
55 virtual const SimpleFormatter& getTwoPattern(const UnicodeString&, const UnicodeString&) const {
56 return twoPattern;
57 }
58
59 virtual const SimpleFormatter& getEndPattern(const UnicodeString&) const {
60 return endPattern;
61 }
62
63protected:
2ca993e8 64 SimpleFormatter twoPattern;
340931cb
A
65 SimpleFormatter endPattern;
66};
67
68PatternHandler::~PatternHandler() {
69}
70
71class ContextualHandler : public PatternHandler {
72public:
73 ContextualHandler(bool (*testFunc)(const UnicodeString& text),
74 const UnicodeString& thenTwo,
75 const UnicodeString& elseTwo,
76 const UnicodeString& thenEnd,
77 const UnicodeString& elseEnd,
78 UErrorCode& errorCode) :
79 PatternHandler(elseTwo, elseEnd, errorCode),
80 test(testFunc),
81 thenTwoPattern(thenTwo, 2, 2, errorCode),
82 thenEndPattern(thenEnd, 2, 2, errorCode) { }
83
84 ContextualHandler(bool (*testFunc)(const UnicodeString& text),
85 const SimpleFormatter& thenTwo, SimpleFormatter elseTwo,
86 const SimpleFormatter& thenEnd, SimpleFormatter elseEnd) :
87 PatternHandler(elseTwo, elseEnd),
88 test(testFunc),
89 thenTwoPattern(thenTwo),
90 thenEndPattern(thenEnd) { }
91
92 ~ContextualHandler() override;
93
94 PatternHandler* clone() const override {
95 return new ContextualHandler(
96 test, thenTwoPattern, twoPattern, thenEndPattern, endPattern);
97 }
98
99 const SimpleFormatter& getTwoPattern(
100 const UnicodeString&, /*ignored*/
101 const UnicodeString& text) const override {
102 return (test)(text) ? thenTwoPattern : twoPattern;
103 }
104
105 const SimpleFormatter& getEndPattern(
106 const UnicodeString& text) const override {
107 return (test)(text) ? thenEndPattern : endPattern;
108 }
109
110private:
111 bool (*test)(const UnicodeString&);
112 SimpleFormatter thenTwoPattern;
113 SimpleFormatter thenEndPattern;
114};
115
116ContextualHandler::~ContextualHandler() {
117}
118
119class ThaiHandler : public PatternHandler {
120public:
121 ThaiHandler(const UnicodeString& two, const UnicodeString& end, UErrorCode& errorCode) :
122 PatternHandler(two, end, errorCode),
123 twoPatternText(two),
124 endPatternText(end),
125 spaceTwoPattern(),
126 twoSpacePattern(),
127 spaceTwoSpacePattern(),
128 spaceEndPattern() {
129 bool needToDeleteSpaceAfter0 = false;
130 UnicodeString tempPattern = two;
131 if (tempPattern.indexOf(UnicodeString(u"{0} ")) < 0) {
132 tempPattern.findAndReplace(UnicodeString(u"{0}"), UnicodeString(u"{0} "));
133 needToDeleteSpaceAfter0 = true;
134 }
135 spaceTwoPattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
136 if (tempPattern.indexOf(UnicodeString(u" {1}")) < 0) {
137 tempPattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(u" {1}"));
138 }
139 spaceTwoSpacePattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
140 if (needToDeleteSpaceAfter0) {
141 tempPattern.findAndReplace(UnicodeString(u"{0} "), UnicodeString(u"{0}"));
142 }
143 twoSpacePattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
144
145 tempPattern = end;
146 if (tempPattern.indexOf(UnicodeString(u" {1}")) < 0) {
147 tempPattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(u" {1}"));
148 }
149 spaceEndPattern = SimpleFormatter(tempPattern, 2, 2, errorCode);
150 }
151
152 ~ThaiHandler() override;
153
154 PatternHandler* clone() const override {
155 UErrorCode dummyErr = U_ZERO_ERROR;
156 return new ThaiHandler(twoPatternText, endPatternText, dummyErr);
157 }
158
159 const SimpleFormatter& getTwoPattern(
160 const UnicodeString& textBefore,
161 const UnicodeString& textAfter) const override {
162 UErrorCode err = U_ZERO_ERROR;
163 bool insertSpaceBefore = !textBefore.isEmpty() && uscript_getScript(textBefore[textBefore.length() - 1], &err) != USCRIPT_THAI;
164 bool insertSpaceAfter = !textAfter.isEmpty() && uscript_getScript(textAfter[0], &err) != USCRIPT_THAI;
165
166 if (insertSpaceBefore) {
167 return insertSpaceAfter ? spaceTwoSpacePattern : spaceTwoPattern;
168 } else {
169 return insertSpaceAfter ? twoSpacePattern : twoPattern;
170 }
171 }
172
173 const SimpleFormatter& getEndPattern(
174 const UnicodeString& text) const override {
175 UErrorCode err = U_ZERO_ERROR;
176 if (!text.isEmpty() && uscript_getScript(text[0], &err) != USCRIPT_THAI) {
177 return spaceEndPattern;
178 } else {
179 return endPattern;
180 }
181 }
182
183private:
184 UnicodeString twoPatternText;
185 UnicodeString endPatternText;
186 SimpleFormatter spaceTwoPattern;
187 SimpleFormatter twoSpacePattern;
188 SimpleFormatter spaceTwoSpacePattern;
189 SimpleFormatter spaceEndPattern;
190};
191
192ThaiHandler::~ThaiHandler() {
193}
194
195static const char16_t *spanishY = u"{0} y {1}";
196static const char16_t *spanishE = u"{0} e {1}";
197static const char16_t *spanishO = u"{0} o {1}";
198static const char16_t *spanishU = u"{0} u {1}";
199static const char16_t *hebrewVav = u"{0} \u05D5{1}";
200static const char16_t *hebrewVavDash = u"{0} \u05D5-{1}";
201
202// Condiction to change to e.
203// Starts with "hi" or "i" but not with "hie" nor "hia"
204static bool shouldChangeToE(const UnicodeString& text) {
205 int32_t len = text.length();
206 if (len == 0) { return false; }
207 // Case insensitive match hi but not hie nor hia.
208 if ((text[0] == u'h' || text[0] == u'H') &&
209 ((len > 1) && (text[1] == u'i' || text[1] == u'I')) &&
210 ((len == 2) || !(text[2] == u'a' || text[2] == u'A' || text[2] == u'e' || text[2] == u'E'))) {
211 return true;
212 }
213 // Case insensitive for "start with i"
214 if (text[0] == u'i' || text[0] == u'I') { return true; }
215 return false;
216}
217
218// Condiction to change to u.
219// Starts with "o", "ho", and "8". Also "11" by itself.
220// re: ^((o|ho|8).*|11)$
221static bool shouldChangeToU(const UnicodeString& text) {
222 int32_t len = text.length();
223 if (len == 0) { return false; }
224 // Case insensitive match o.* and 8.*
225 if (text[0] == u'o' || text[0] == u'O' || text[0] == u'8') { return true; }
226 // Case insensitive match ho.*
227 if ((text[0] == u'h' || text[0] == u'H') &&
228 ((len > 1) && (text[1] == 'o' || text[1] == u'O'))) {
229 return true;
230 }
231 // match "^11$" and "^11 .*"
232 if ((len >= 2) && text[0] == u'1' && text[1] == u'1' && (len == 2 || text[2] == u' ')) { return true; }
233 return false;
234}
235
236// Condiction to change to VAV follow by a dash.
237// Starts with non Hebrew letter.
238static bool shouldChangeToVavDash(const UnicodeString& text) {
239 if (text.isEmpty()) { return false; }
240 UErrorCode status = U_ZERO_ERROR;
241 return uscript_getScript(text.char32At(0), &status) != USCRIPT_HEBREW;
242}
243
244PatternHandler* createPatternHandler(
245 const char* lang, const UnicodeString& two, const UnicodeString& end,
246 UErrorCode& status) {
247 if (uprv_strcmp(lang, "es") == 0) {
248 // Spanish
249 UnicodeString spanishYStr(TRUE, spanishY, -1);
250 bool twoIsY = two == spanishYStr;
251 bool endIsY = end == spanishYStr;
252 if (twoIsY || endIsY) {
253 UnicodeString replacement(TRUE, spanishE, -1);
254 return new ContextualHandler(
255 shouldChangeToE,
256 twoIsY ? replacement : two, two,
257 endIsY ? replacement : end, end, status);
258 }
259 UnicodeString spanishOStr(TRUE, spanishO, -1);
260 bool twoIsO = two == spanishOStr;
261 bool endIsO = end == spanishOStr;
262 if (twoIsO || endIsO) {
263 UnicodeString replacement(TRUE, spanishU, -1);
264 return new ContextualHandler(
265 shouldChangeToU,
266 twoIsO ? replacement : two, two,
267 endIsO ? replacement : end, end, status);
268 }
269 } else if (uprv_strcmp(lang, "he") == 0 || uprv_strcmp(lang, "iw") == 0) {
270 // Hebrew
271 UnicodeString hebrewVavStr(TRUE, hebrewVav, -1);
272 bool twoIsVav = two == hebrewVavStr;
273 bool endIsVav = end == hebrewVavStr;
274 if (twoIsVav || endIsVav) {
275 UnicodeString replacement(TRUE, hebrewVavDash, -1);
276 return new ContextualHandler(
277 shouldChangeToVavDash,
278 twoIsVav ? replacement : two, two,
279 endIsVav ? replacement : end, end, status);
280 }
281 } else if (uprv_strcmp(lang, "th") == 0) {
282 return new ThaiHandler(two, end, status);
283 }
284 return new PatternHandler(two, end, status);
285}
286
287} // namespace
288
289struct ListFormatInternal : public UMemory {
2ca993e8
A
290 SimpleFormatter startPattern;
291 SimpleFormatter middlePattern;
340931cb 292 LocalPointer<PatternHandler> patternHandler;
57a6839d
A
293
294ListFormatInternal(
295 const UnicodeString& two,
296 const UnicodeString& start,
297 const UnicodeString& middle,
2ca993e8 298 const UnicodeString& end,
340931cb 299 const Locale& locale,
2ca993e8 300 UErrorCode &errorCode) :
2ca993e8
A
301 startPattern(start, 2, 2, errorCode),
302 middlePattern(middle, 2, 2, errorCode),
340931cb 303 patternHandler(createPatternHandler(locale.getLanguage(), two, end, errorCode), errorCode) { }
2ca993e8
A
304
305ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
2ca993e8
A
306 startPattern(data.startPattern, errorCode),
307 middlePattern(data.middlePattern, errorCode),
340931cb
A
308 patternHandler(createPatternHandler(
309 data.locale.getLanguage(), data.twoPattern, data.endPattern, errorCode), errorCode) { }
57a6839d
A
310
311ListFormatInternal(const ListFormatInternal &other) :
57a6839d
A
312 startPattern(other.startPattern),
313 middlePattern(other.middlePattern),
340931cb 314 patternHandler(other.patternHandler->clone()) { }
57a6839d
A
315};
316
317
3d1f044b
A
318#if !UCONFIG_NO_FORMATTING
319class FormattedListData : public FormattedValueFieldPositionIteratorImpl {
320public:
321 FormattedListData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {}
322 virtual ~FormattedListData();
323};
324
325FormattedListData::~FormattedListData() = default;
326
327UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedList)
328#endif
329
57a6839d 330
3d1f044b 331static Hashtable* listPatternHash = nullptr;
51004dcb
A
332
333U_CDECL_BEGIN
334static UBool U_CALLCONV uprv_listformatter_cleanup() {
335 delete listPatternHash;
3d1f044b 336 listPatternHash = nullptr;
51004dcb
A
337 return TRUE;
338}
339
340static void U_CALLCONV
57a6839d
A
341uprv_deleteListFormatInternal(void *obj) {
342 delete static_cast<ListFormatInternal *>(obj);
51004dcb
A
343}
344
345U_CDECL_END
346
57a6839d
A
347ListFormatter::ListFormatter(const ListFormatter& other) :
348 owned(other.owned), data(other.data) {
3d1f044b 349 if (other.owned != nullptr) {
57a6839d
A
350 owned = new ListFormatInternal(*other.owned);
351 data = owned;
352 }
353}
354
355ListFormatter& ListFormatter::operator=(const ListFormatter& other) {
356 if (this == &other) {
357 return *this;
358 }
359 delete owned;
360 if (other.owned) {
361 owned = new ListFormatInternal(*other.owned);
362 data = owned;
363 } else {
3d1f044b 364 owned = nullptr;
57a6839d
A
365 data = other.data;
366 }
367 return *this;
368}
51004dcb
A
369
370void ListFormatter::initializeHash(UErrorCode& errorCode) {
371 if (U_FAILURE(errorCode)) {
372 return;
373 }
374
375 listPatternHash = new Hashtable();
3d1f044b 376 if (listPatternHash == nullptr) {
51004dcb
A
377 errorCode = U_MEMORY_ALLOCATION_ERROR;
378 return;
379 }
380
57a6839d 381 listPatternHash->setValueDeleter(uprv_deleteListFormatInternal);
3d1f044b 382 ucln_i18n_registerCleanup(UCLN_I18N_LIST_FORMATTER, uprv_listformatter_cleanup);
51004dcb
A
383
384}
385
57a6839d
A
386const ListFormatInternal* ListFormatter::getListFormatInternal(
387 const Locale& locale, const char *style, UErrorCode& errorCode) {
51004dcb 388 if (U_FAILURE(errorCode)) {
3d1f044b 389 return nullptr;
51004dcb 390 }
57a6839d
A
391 CharString keyBuffer(locale.getName(), errorCode);
392 keyBuffer.append(':', errorCode).append(style, errorCode);
393 UnicodeString key(keyBuffer.data(), -1, US_INV);
3d1f044b 394 ListFormatInternal* result = nullptr;
340931cb 395 static UMutex listFormatterMutex;
51004dcb 396 {
340931cb 397 Mutex m(&listFormatterMutex);
3d1f044b 398 if (listPatternHash == nullptr) {
51004dcb
A
399 initializeHash(errorCode);
400 if (U_FAILURE(errorCode)) {
3d1f044b 401 return nullptr;
51004dcb
A
402 }
403 }
57a6839d 404 result = static_cast<ListFormatInternal*>(listPatternHash->get(key));
51004dcb 405 }
3d1f044b 406 if (result != nullptr) {
51004dcb
A
407 return result;
408 }
57a6839d 409 result = loadListFormatInternal(locale, style, errorCode);
51004dcb 410 if (U_FAILURE(errorCode)) {
3d1f044b 411 return nullptr;
51004dcb
A
412 }
413
414 {
340931cb 415 Mutex m(&listFormatterMutex);
57a6839d 416 ListFormatInternal* temp = static_cast<ListFormatInternal*>(listPatternHash->get(key));
3d1f044b 417 if (temp != nullptr) {
51004dcb
A
418 delete result;
419 result = temp;
420 } else {
421 listPatternHash->put(key, result, errorCode);
422 if (U_FAILURE(errorCode)) {
3d1f044b 423 return nullptr;
51004dcb
A
424 }
425 }
426 }
427 return result;
428}
429
340931cb
A
430#if !UCONFIG_NO_FORMATTING
431static const char* typeWidthToStyleString(UListFormatterType type, UListFormatterWidth width) {
432 switch (type) {
433 case ULISTFMT_TYPE_AND:
434 switch (width) {
435 case ULISTFMT_WIDTH_WIDE:
436 return "standard";
437 case ULISTFMT_WIDTH_SHORT:
438 return "standard-short";
439 case ULISTFMT_WIDTH_NARROW:
440 return "standard-narrow";
441 default:
442 return nullptr;
443 }
444 break;
445
446 case ULISTFMT_TYPE_OR:
447 switch (width) {
448 case ULISTFMT_WIDTH_WIDE:
449 return "or";
450 case ULISTFMT_WIDTH_SHORT:
451 return "or-short";
452 case ULISTFMT_WIDTH_NARROW:
453 return "or-narrow";
454 default:
455 return nullptr;
456 }
457 break;
458
459 case ULISTFMT_TYPE_UNITS:
460 switch (width) {
461 case ULISTFMT_WIDTH_WIDE:
462 return "unit";
463 case ULISTFMT_WIDTH_SHORT:
464 return "unit-short";
465 case ULISTFMT_WIDTH_NARROW:
466 return "unit-narrow";
467 default:
468 return nullptr;
469 }
470 }
471
472 return nullptr;
473}
474#endif
475
f3c0d7a5
A
476static const UChar solidus = 0x2F;
477static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/"
478enum {
479 kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix),
480 kStyleLenMax = 24 // longest currently is 14
481};
482
483struct ListFormatter::ListPatternsSink : public ResourceSink {
484 UnicodeString two, start, middle, end;
485#if ((U_PLATFORM == U_PF_AIX) || (U_PLATFORM == U_PF_OS390)) && (U_CPLUSPLUS_VERSION < 11)
486 char aliasedStyle[kStyleLenMax+1];
487 ListPatternsSink() {
488 uprv_memset(aliasedStyle, 0, kStyleLenMax+1);
489 }
490#else
491 char aliasedStyle[kStyleLenMax+1] = {0};
492
493 ListPatternsSink() {}
494#endif
495 virtual ~ListPatternsSink();
496
497 void setAliasedStyle(UnicodeString alias) {
498 int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0);
499 if (startIndex < 0) {
500 return;
501 }
502 startIndex += kAliasPrefixLen;
503 int32_t endIndex = alias.indexOf(solidus, startIndex);
504 if (endIndex < 0) {
505 endIndex = alias.length();
506 }
507 alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV);
508 aliasedStyle[kStyleLenMax] = 0;
509 }
510
511 void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) {
512 if (pattern.isEmpty()) {
513 if (value.getType() == URES_ALIAS) {
514 if (aliasedStyle[0] == 0) {
515 setAliasedStyle(value.getAliasUnicodeString(errorCode));
516 }
517 } else {
518 pattern = value.getUnicodeString(errorCode);
519 }
520 }
521 }
522
523 virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
524 UErrorCode &errorCode) {
525 aliasedStyle[0] = 0;
526 if (value.getType() == URES_ALIAS) {
527 setAliasedStyle(value.getAliasUnicodeString(errorCode));
528 return;
529 }
530 ResourceTable listPatterns = value.getTable(errorCode);
531 for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) {
532 if (uprv_strcmp(key, "2") == 0) {
533 handleValueForPattern(value, two, errorCode);
534 } else if (uprv_strcmp(key, "end") == 0) {
535 handleValueForPattern(value, end, errorCode);
536 } else if (uprv_strcmp(key, "middle") == 0) {
537 handleValueForPattern(value, middle, errorCode);
538 } else if (uprv_strcmp(key, "start") == 0) {
539 handleValueForPattern(value, start, errorCode);
540 }
541 }
542 }
543};
544
545// Virtual destructors must be defined out of line.
546ListFormatter::ListPatternsSink::~ListPatternsSink() {}
547
548ListFormatInternal* ListFormatter::loadListFormatInternal(
57a6839d 549 const Locale& locale, const char * style, UErrorCode& errorCode) {
3d1f044b 550 UResourceBundle* rb = ures_open(nullptr, locale.getName(), &errorCode);
f3c0d7a5 551 rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
51004dcb
A
552 if (U_FAILURE(errorCode)) {
553 ures_close(rb);
3d1f044b 554 return nullptr;
51004dcb 555 }
f3c0d7a5
A
556 ListFormatter::ListPatternsSink sink;
557 char currentStyle[kStyleLenMax+1];
558 uprv_strncpy(currentStyle, style, kStyleLenMax);
559 currentStyle[kStyleLenMax] = 0;
57a6839d 560
f3c0d7a5
A
561 for (;;) {
562 ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode);
563 if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) {
564 break;
565 }
566 uprv_strcpy(currentStyle, sink.aliasedStyle);
51004dcb 567 }
51004dcb
A
568 ures_close(rb);
569 if (U_FAILURE(errorCode)) {
3d1f044b 570 return nullptr;
51004dcb 571 }
f3c0d7a5
A
572 if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) {
573 errorCode = U_MISSING_RESOURCE_ERROR;
3d1f044b 574 return nullptr;
f3c0d7a5 575 }
340931cb
A
576
577 ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, locale, errorCode);
3d1f044b 578 if (result == nullptr) {
51004dcb 579 errorCode = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 580 return nullptr;
51004dcb 581 }
2ca993e8
A
582 if (U_FAILURE(errorCode)) {
583 delete result;
3d1f044b 584 return nullptr;
2ca993e8 585 }
51004dcb
A
586 return result;
587}
588
51004dcb
A
589ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
590 Locale locale; // The default locale.
591 return createInstance(locale, errorCode);
592}
593
594ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
340931cb
A
595#if !UCONFIG_NO_FORMATTING
596 return createInstance(locale, ULISTFMT_TYPE_AND, ULISTFMT_WIDTH_WIDE, errorCode);
597#else
598 return createInstance(locale, "standard", errorCode);
599#endif
600}
601
602#if !UCONFIG_NO_FORMATTING
603ListFormatter* ListFormatter::createInstance(
604 const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode) {
605 const char* style = typeWidthToStyleString(type, width);
606 if (style == nullptr) {
607 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
608 return nullptr;
609 }
610 return createInstance(locale, style, errorCode);
57a6839d 611}
340931cb 612#endif
57a6839d
A
613
614ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) {
3d1f044b 615 const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode);
51004dcb 616 if (U_FAILURE(errorCode)) {
3d1f044b 617 return nullptr;
51004dcb 618 }
57a6839d 619 ListFormatter* p = new ListFormatter(listFormatInternal);
3d1f044b 620 if (p == nullptr) {
51004dcb 621 errorCode = U_MEMORY_ALLOCATION_ERROR;
3d1f044b 622 return nullptr;
51004dcb
A
623 }
624 return p;
625}
626
2ca993e8
A
627ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &errorCode) {
628 owned = new ListFormatInternal(listFormatData, errorCode);
57a6839d 629 data = owned;
51004dcb
A
630}
631
3d1f044b 632ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(nullptr), data(listFormatterInternal) {
57a6839d 633}
51004dcb 634
57a6839d
A
635ListFormatter::~ListFormatter() {
636 delete owned;
51004dcb
A
637}
638
639/**
57a6839d
A
640 * Joins first and second using the pattern pat.
641 * On entry offset is an offset into first or -1 if offset unspecified.
642 * On exit offset is offset of second in result if recordOffset was set
643 * Otherwise if it was >=0 it is set to point into result where it used
b331163b
A
644 * to point into first. On exit, result is the join of first and second
645 * according to pat. Any previous value of result gets replaced.
51004dcb 646 */
b331163b 647static void joinStringsAndReplace(
2ca993e8 648 const SimpleFormatter& pat,
57a6839d
A
649 const UnicodeString& first,
650 const UnicodeString& second,
651 UnicodeString &result,
652 UBool recordOffset,
653 int32_t &offset,
3d1f044b
A
654 int32_t *offsetFirst,
655 int32_t *offsetSecond,
57a6839d 656 UErrorCode& errorCode) {
51004dcb
A
657 if (U_FAILURE(errorCode)) {
658 return;
659 }
57a6839d
A
660 const UnicodeString *params[2] = {&first, &second};
661 int32_t offsets[2];
b331163b 662 pat.formatAndReplace(
57a6839d 663 params,
b331163b 664 UPRV_LENGTHOF(params),
57a6839d
A
665 result,
666 offsets,
b331163b 667 UPRV_LENGTHOF(offsets),
57a6839d
A
668 errorCode);
669 if (U_FAILURE(errorCode)) {
51004dcb
A
670 return;
671 }
57a6839d
A
672 if (offsets[0] == -1 || offsets[1] == -1) {
673 errorCode = U_INVALID_FORMAT_ERROR;
51004dcb
A
674 return;
675 }
57a6839d
A
676 if (recordOffset) {
677 offset = offsets[1];
678 } else if (offset >= 0) {
679 offset += offsets[0];
680 }
3d1f044b
A
681 if (offsetFirst != nullptr) *offsetFirst = offsets[0];
682 if (offsetSecond != nullptr) *offsetSecond = offsets[1];
57a6839d 683}
51004dcb 684
57a6839d
A
685UnicodeString& ListFormatter::format(
686 const UnicodeString items[],
687 int32_t nItems,
688 UnicodeString& appendTo,
689 UErrorCode& errorCode) const {
690 int32_t offset;
691 return format(items, nItems, appendTo, -1, offset, errorCode);
692}
51004dcb 693
3d1f044b
A
694#if !UCONFIG_NO_FORMATTING
695UnicodeString& ListFormatter::format(
696 const UnicodeString items[],
697 int32_t nItems,
698 UnicodeString & appendTo,
699 FieldPositionIterator* posIter,
700 UErrorCode& errorCode) const {
701 int32_t offset;
702 FieldPositionIteratorHandler handler(posIter, errorCode);
703 return format_(items, nItems, appendTo, -1, offset, &handler, errorCode);
704}
705#endif
706
57a6839d
A
707UnicodeString& ListFormatter::format(
708 const UnicodeString items[],
709 int32_t nItems,
710 UnicodeString& appendTo,
711 int32_t index,
712 int32_t &offset,
713 UErrorCode& errorCode) const {
3d1f044b
A
714 return format_(items, nItems, appendTo, index, offset, nullptr, errorCode);
715}
716
717#if !UCONFIG_NO_FORMATTING
718FormattedList ListFormatter::formatStringsToValue(
719 const UnicodeString items[],
720 int32_t nItems,
721 UErrorCode& errorCode) const {
722 LocalPointer<FormattedListData> result(new FormattedListData(errorCode), errorCode);
723 if (U_FAILURE(errorCode)) {
724 return FormattedList(errorCode);
725 }
726 UnicodeString string;
727 int32_t offset;
728 auto handler = result->getHandler(errorCode);
729 handler.setCategory(UFIELD_CATEGORY_LIST);
730 format_(items, nItems, string, -1, offset, &handler, errorCode);
731 handler.getError(errorCode);
732 result->appendString(string, errorCode);
733 if (U_FAILURE(errorCode)) {
734 return FormattedList(errorCode);
735 }
736
737 // Add span fields and sort
738 ConstrainedFieldPosition cfpos;
739 cfpos.constrainField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD);
740 int32_t i = 0;
741 handler.setCategory(UFIELD_CATEGORY_LIST_SPAN);
742 while (result->nextPosition(cfpos, errorCode)) {
743 handler.addAttribute(i++, cfpos.getStart(), cfpos.getLimit());
744 }
745 handler.getError(errorCode);
746 if (U_FAILURE(errorCode)) {
747 return FormattedList(errorCode);
748 }
749 result->sort();
750
751 return FormattedList(result.orphan());
752}
753#endif
754
755UnicodeString& ListFormatter::format_(
756 const UnicodeString items[],
757 int32_t nItems,
758 UnicodeString& appendTo,
759 int32_t index,
760 int32_t &offset,
761 FieldPositionHandler* handler,
762 UErrorCode& errorCode) const {
763#if !UCONFIG_NO_FORMATTING
57a6839d
A
764 offset = -1;
765 if (U_FAILURE(errorCode)) {
766 return appendTo;
767 }
3d1f044b 768 if (data == nullptr) {
57a6839d
A
769 errorCode = U_INVALID_STATE_ERROR;
770 return appendTo;
51004dcb
A
771 }
772
57a6839d
A
773 if (nItems <= 0) {
774 return appendTo;
775 }
776 if (nItems == 1) {
777 if (index == 0) {
778 offset = appendTo.length();
779 }
3d1f044b
A
780 if (handler != nullptr) {
781 handler->addAttribute(ULISTFMT_ELEMENT_FIELD,
782 appendTo.length(),
783 appendTo.length() + items[0].length());
784 }
57a6839d
A
785 appendTo.append(items[0]);
786 return appendTo;
787 }
b331163b 788 UnicodeString result(items[0]);
57a6839d
A
789 if (index == 0) {
790 offset = 0;
791 }
340931cb
A
792 int32_t offsetFirst = 0;
793 int32_t offsetSecond = 0;
3d1f044b
A
794 int32_t prefixLength = 0;
795 // for n items, there are 2 * (n + 1) boundary including 0 and the upper
796 // edge.
797 MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0);
340931cb
A
798 if (nItems == 2) {
799 joinStringsAndReplace(
800 data->patternHandler->getTwoPattern(items[0], items[1]),
801 result,
802 items[1],
803 result,
804 index == 1,
805 offset,
806 &offsetFirst,
807 &offsetSecond,
808 errorCode);
809 } else {
810 joinStringsAndReplace(
811 data->startPattern,
812 result,
813 items[1],
814 result,
815 index == 1,
816 offset,
817 &offsetFirst,
818 &offsetSecond,
819 errorCode);
820 }
3d1f044b
A
821 if (handler != nullptr) {
822 offsets[0] = 0;
823 prefixLength += offsetFirst;
824 offsets[1] = offsetSecond - prefixLength;
825 }
b331163b
A
826 if (nItems > 2) {
827 for (int32_t i = 2; i < nItems - 1; ++i) {
828 joinStringsAndReplace(
829 data->middlePattern,
830 result,
831 items[i],
832 result,
833 index == i,
834 offset,
3d1f044b
A
835 &offsetFirst,
836 &offsetSecond,
b331163b 837 errorCode);
3d1f044b
A
838 if (handler != nullptr) {
839 prefixLength += offsetFirst;
840 offsets[i] = offsetSecond - prefixLength;
841 }
b331163b
A
842 }
843 joinStringsAndReplace(
340931cb 844 data->patternHandler->getEndPattern(items[nItems - 1]),
b331163b
A
845 result,
846 items[nItems - 1],
847 result,
848 index == nItems - 1,
849 offset,
3d1f044b
A
850 &offsetFirst,
851 &offsetSecond,
b331163b 852 errorCode);
3d1f044b
A
853 if (handler != nullptr) {
854 prefixLength += offsetFirst;
855 offsets[nItems - 1] = offsetSecond - prefixLength;
856 }
857 }
858 if (handler != nullptr) {
859 // If there are already some data in appendTo, we need to adjust the index
860 // by shifting that lenght while insert into handler.
861 int32_t shift = appendTo.length() + prefixLength;
862 // Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements
863 for (int32_t i = 0; i < nItems; ++i) {
864 offsets[i + nItems] = offsets[i] + items[i].length() + shift;
865 offsets[i] += shift;
866 handler->addAttribute(
867 ULISTFMT_ELEMENT_FIELD, // id
868 offsets[i], // index
869 offsets[i + nItems]); // limit
870 }
871 // The locale pattern may reorder the items (such as in ur-IN locale),
872 // so we cannot assume the array is in accendning order.
873 // To handle the edging case, just insert the two ends into the array
874 // and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies
875 // between the even and odd position are not the same in the sorted array.
876 offsets[2 * nItems] = shift - prefixLength;
877 offsets[2 * nItems + 1] = result.length() + shift - prefixLength;
878 uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t),
879 uprv_int32Comparator, nullptr,
880 false, &errorCode);
881 for (int32_t i = 0; i <= nItems; ++i) {
882 if (offsets[i * 2] != offsets[i * 2 + 1]) {
883 handler->addAttribute(
884 ULISTFMT_LITERAL_FIELD, // id
885 offsets[i * 2], // index
886 offsets[i * 2 + 1]); // limit
887 }
888 }
57a6839d 889 }
57a6839d
A
890 if (U_SUCCESS(errorCode)) {
891 if (offset >= 0) {
892 offset += appendTo.length();
893 }
b331163b 894 appendTo += result;
57a6839d 895 }
3d1f044b 896#endif
57a6839d 897 return appendTo;
51004dcb
A
898}
899
900U_NAMESPACE_END