ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / listformatter.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2013-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: listformatter.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2012aug27
16 * created by: Umesh P. Nair
17 */
18
19 #include "cmemory.h"
20 #include "unicode/fpositer.h" // FieldPositionIterator
21 #include "unicode/listformatter.h"
22 #include "unicode/simpleformatter.h"
23 #include "unicode/ulistformatter.h"
24 #include "fphdlimp.h"
25 #include "mutex.h"
26 #include "hash.h"
27 #include "cstring.h"
28 #include "uarrsort.h"
29 #include "ulocimp.h"
30 #include "charstr.h"
31 #include "ucln_in.h"
32 #include "uresimp.h"
33 #include "resource.h"
34 #include "formattedval_impl.h"
35
36 U_NAMESPACE_BEGIN
37
38 struct ListFormatInternal : public UMemory {
39 SimpleFormatter twoPattern;
40 SimpleFormatter startPattern;
41 SimpleFormatter middlePattern;
42 SimpleFormatter endPattern;
43
44 ListFormatInternal(
45 const UnicodeString& two,
46 const UnicodeString& start,
47 const UnicodeString& middle,
48 const UnicodeString& end,
49 UErrorCode &errorCode) :
50 twoPattern(two, 2, 2, errorCode),
51 startPattern(start, 2, 2, errorCode),
52 middlePattern(middle, 2, 2, errorCode),
53 endPattern(end, 2, 2, errorCode) {}
54
55 ListFormatInternal(const ListFormatData &data, UErrorCode &errorCode) :
56 twoPattern(data.twoPattern, errorCode),
57 startPattern(data.startPattern, errorCode),
58 middlePattern(data.middlePattern, errorCode),
59 endPattern(data.endPattern, errorCode) { }
60
61 ListFormatInternal(const ListFormatInternal &other) :
62 twoPattern(other.twoPattern),
63 startPattern(other.startPattern),
64 middlePattern(other.middlePattern),
65 endPattern(other.endPattern) { }
66 };
67
68
69 #if !UCONFIG_NO_FORMATTING
70 class FormattedListData : public FormattedValueFieldPositionIteratorImpl {
71 public:
72 FormattedListData(UErrorCode& status) : FormattedValueFieldPositionIteratorImpl(5, status) {}
73 virtual ~FormattedListData();
74 };
75
76 FormattedListData::~FormattedListData() = default;
77
78 UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedList)
79 #endif
80
81
82 static Hashtable* listPatternHash = nullptr;
83 static const char STANDARD_STYLE[] = "standard";
84
85 U_CDECL_BEGIN
86 static UBool U_CALLCONV uprv_listformatter_cleanup() {
87 delete listPatternHash;
88 listPatternHash = nullptr;
89 return TRUE;
90 }
91
92 static void U_CALLCONV
93 uprv_deleteListFormatInternal(void *obj) {
94 delete static_cast<ListFormatInternal *>(obj);
95 }
96
97 U_CDECL_END
98
99 ListFormatter::ListFormatter(const ListFormatter& other) :
100 owned(other.owned), data(other.data) {
101 if (other.owned != nullptr) {
102 owned = new ListFormatInternal(*other.owned);
103 data = owned;
104 }
105 }
106
107 ListFormatter& ListFormatter::operator=(const ListFormatter& other) {
108 if (this == &other) {
109 return *this;
110 }
111 delete owned;
112 if (other.owned) {
113 owned = new ListFormatInternal(*other.owned);
114 data = owned;
115 } else {
116 owned = nullptr;
117 data = other.data;
118 }
119 return *this;
120 }
121
122 void ListFormatter::initializeHash(UErrorCode& errorCode) {
123 if (U_FAILURE(errorCode)) {
124 return;
125 }
126
127 listPatternHash = new Hashtable();
128 if (listPatternHash == nullptr) {
129 errorCode = U_MEMORY_ALLOCATION_ERROR;
130 return;
131 }
132
133 listPatternHash->setValueDeleter(uprv_deleteListFormatInternal);
134 ucln_i18n_registerCleanup(UCLN_I18N_LIST_FORMATTER, uprv_listformatter_cleanup);
135
136 }
137
138 const ListFormatInternal* ListFormatter::getListFormatInternal(
139 const Locale& locale, const char *style, UErrorCode& errorCode) {
140 if (U_FAILURE(errorCode)) {
141 return nullptr;
142 }
143 CharString keyBuffer(locale.getName(), errorCode);
144 keyBuffer.append(':', errorCode).append(style, errorCode);
145 UnicodeString key(keyBuffer.data(), -1, US_INV);
146 ListFormatInternal* result = nullptr;
147 static UMutex *listFormatterMutex = STATIC_NEW(UMutex);
148 {
149 Mutex m(listFormatterMutex);
150 if (listPatternHash == nullptr) {
151 initializeHash(errorCode);
152 if (U_FAILURE(errorCode)) {
153 return nullptr;
154 }
155 }
156 result = static_cast<ListFormatInternal*>(listPatternHash->get(key));
157 }
158 if (result != nullptr) {
159 return result;
160 }
161 result = loadListFormatInternal(locale, style, errorCode);
162 if (U_FAILURE(errorCode)) {
163 return nullptr;
164 }
165
166 {
167 Mutex m(listFormatterMutex);
168 ListFormatInternal* temp = static_cast<ListFormatInternal*>(listPatternHash->get(key));
169 if (temp != nullptr) {
170 delete result;
171 result = temp;
172 } else {
173 listPatternHash->put(key, result, errorCode);
174 if (U_FAILURE(errorCode)) {
175 return nullptr;
176 }
177 }
178 }
179 return result;
180 }
181
182 static const UChar solidus = 0x2F;
183 static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/"
184 enum {
185 kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix),
186 kStyleLenMax = 24 // longest currently is 14
187 };
188
189 struct ListFormatter::ListPatternsSink : public ResourceSink {
190 UnicodeString two, start, middle, end;
191 #if ((U_PLATFORM == U_PF_AIX) || (U_PLATFORM == U_PF_OS390)) && (U_CPLUSPLUS_VERSION < 11)
192 char aliasedStyle[kStyleLenMax+1];
193 ListPatternsSink() {
194 uprv_memset(aliasedStyle, 0, kStyleLenMax+1);
195 }
196 #else
197 char aliasedStyle[kStyleLenMax+1] = {0};
198
199 ListPatternsSink() {}
200 #endif
201 virtual ~ListPatternsSink();
202
203 void setAliasedStyle(UnicodeString alias) {
204 int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0);
205 if (startIndex < 0) {
206 return;
207 }
208 startIndex += kAliasPrefixLen;
209 int32_t endIndex = alias.indexOf(solidus, startIndex);
210 if (endIndex < 0) {
211 endIndex = alias.length();
212 }
213 alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV);
214 aliasedStyle[kStyleLenMax] = 0;
215 }
216
217 void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) {
218 if (pattern.isEmpty()) {
219 if (value.getType() == URES_ALIAS) {
220 if (aliasedStyle[0] == 0) {
221 setAliasedStyle(value.getAliasUnicodeString(errorCode));
222 }
223 } else {
224 pattern = value.getUnicodeString(errorCode);
225 }
226 }
227 }
228
229 virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
230 UErrorCode &errorCode) {
231 aliasedStyle[0] = 0;
232 if (value.getType() == URES_ALIAS) {
233 setAliasedStyle(value.getAliasUnicodeString(errorCode));
234 return;
235 }
236 ResourceTable listPatterns = value.getTable(errorCode);
237 for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) {
238 if (uprv_strcmp(key, "2") == 0) {
239 handleValueForPattern(value, two, errorCode);
240 } else if (uprv_strcmp(key, "end") == 0) {
241 handleValueForPattern(value, end, errorCode);
242 } else if (uprv_strcmp(key, "middle") == 0) {
243 handleValueForPattern(value, middle, errorCode);
244 } else if (uprv_strcmp(key, "start") == 0) {
245 handleValueForPattern(value, start, errorCode);
246 }
247 }
248 }
249 };
250
251 // Virtual destructors must be defined out of line.
252 ListFormatter::ListPatternsSink::~ListPatternsSink() {}
253
254 ListFormatInternal* ListFormatter::loadListFormatInternal(
255 const Locale& locale, const char * style, UErrorCode& errorCode) {
256 UResourceBundle* rb = ures_open(nullptr, locale.getName(), &errorCode);
257 rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
258 if (U_FAILURE(errorCode)) {
259 ures_close(rb);
260 return nullptr;
261 }
262 ListFormatter::ListPatternsSink sink;
263 char currentStyle[kStyleLenMax+1];
264 uprv_strncpy(currentStyle, style, kStyleLenMax);
265 currentStyle[kStyleLenMax] = 0;
266
267 for (;;) {
268 ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode);
269 if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) {
270 break;
271 }
272 uprv_strcpy(currentStyle, sink.aliasedStyle);
273 }
274 ures_close(rb);
275 if (U_FAILURE(errorCode)) {
276 return nullptr;
277 }
278 if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) {
279 errorCode = U_MISSING_RESOURCE_ERROR;
280 return nullptr;
281 }
282 ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode);
283 if (result == nullptr) {
284 errorCode = U_MEMORY_ALLOCATION_ERROR;
285 return nullptr;
286 }
287 if (U_FAILURE(errorCode)) {
288 delete result;
289 return nullptr;
290 }
291 return result;
292 }
293
294 ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
295 Locale locale; // The default locale.
296 return createInstance(locale, errorCode);
297 }
298
299 ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
300 return createInstance(locale, STANDARD_STYLE, errorCode);
301 }
302
303 ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) {
304 const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode);
305 if (U_FAILURE(errorCode)) {
306 return nullptr;
307 }
308 ListFormatter* p = new ListFormatter(listFormatInternal);
309 if (p == nullptr) {
310 errorCode = U_MEMORY_ALLOCATION_ERROR;
311 return nullptr;
312 }
313 return p;
314 }
315
316 ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &errorCode) {
317 owned = new ListFormatInternal(listFormatData, errorCode);
318 data = owned;
319 }
320
321 ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(nullptr), data(listFormatterInternal) {
322 }
323
324 ListFormatter::~ListFormatter() {
325 delete owned;
326 }
327
328 /**
329 * Joins first and second using the pattern pat.
330 * On entry offset is an offset into first or -1 if offset unspecified.
331 * On exit offset is offset of second in result if recordOffset was set
332 * Otherwise if it was >=0 it is set to point into result where it used
333 * to point into first. On exit, result is the join of first and second
334 * according to pat. Any previous value of result gets replaced.
335 */
336 static void joinStringsAndReplace(
337 const SimpleFormatter& pat,
338 const UnicodeString& first,
339 const UnicodeString& second,
340 UnicodeString &result,
341 UBool recordOffset,
342 int32_t &offset,
343 int32_t *offsetFirst,
344 int32_t *offsetSecond,
345 UErrorCode& errorCode) {
346 if (U_FAILURE(errorCode)) {
347 return;
348 }
349 const UnicodeString *params[2] = {&first, &second};
350 int32_t offsets[2];
351 pat.formatAndReplace(
352 params,
353 UPRV_LENGTHOF(params),
354 result,
355 offsets,
356 UPRV_LENGTHOF(offsets),
357 errorCode);
358 if (U_FAILURE(errorCode)) {
359 return;
360 }
361 if (offsets[0] == -1 || offsets[1] == -1) {
362 errorCode = U_INVALID_FORMAT_ERROR;
363 return;
364 }
365 if (recordOffset) {
366 offset = offsets[1];
367 } else if (offset >= 0) {
368 offset += offsets[0];
369 }
370 if (offsetFirst != nullptr) *offsetFirst = offsets[0];
371 if (offsetSecond != nullptr) *offsetSecond = offsets[1];
372 }
373
374 UnicodeString& ListFormatter::format(
375 const UnicodeString items[],
376 int32_t nItems,
377 UnicodeString& appendTo,
378 UErrorCode& errorCode) const {
379 int32_t offset;
380 return format(items, nItems, appendTo, -1, offset, errorCode);
381 }
382
383 #if !UCONFIG_NO_FORMATTING
384 UnicodeString& ListFormatter::format(
385 const UnicodeString items[],
386 int32_t nItems,
387 UnicodeString & appendTo,
388 FieldPositionIterator* posIter,
389 UErrorCode& errorCode) const {
390 int32_t offset;
391 FieldPositionIteratorHandler handler(posIter, errorCode);
392 return format_(items, nItems, appendTo, -1, offset, &handler, errorCode);
393 }
394 #endif
395
396 UnicodeString& ListFormatter::format(
397 const UnicodeString items[],
398 int32_t nItems,
399 UnicodeString& appendTo,
400 int32_t index,
401 int32_t &offset,
402 UErrorCode& errorCode) const {
403 return format_(items, nItems, appendTo, index, offset, nullptr, errorCode);
404 }
405
406 #if !UCONFIG_NO_FORMATTING
407 FormattedList ListFormatter::formatStringsToValue(
408 const UnicodeString items[],
409 int32_t nItems,
410 UErrorCode& errorCode) const {
411 LocalPointer<FormattedListData> result(new FormattedListData(errorCode), errorCode);
412 if (U_FAILURE(errorCode)) {
413 return FormattedList(errorCode);
414 }
415 UnicodeString string;
416 int32_t offset;
417 auto handler = result->getHandler(errorCode);
418 handler.setCategory(UFIELD_CATEGORY_LIST);
419 format_(items, nItems, string, -1, offset, &handler, errorCode);
420 handler.getError(errorCode);
421 result->appendString(string, errorCode);
422 if (U_FAILURE(errorCode)) {
423 return FormattedList(errorCode);
424 }
425
426 // Add span fields and sort
427 ConstrainedFieldPosition cfpos;
428 cfpos.constrainField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD);
429 int32_t i = 0;
430 handler.setCategory(UFIELD_CATEGORY_LIST_SPAN);
431 while (result->nextPosition(cfpos, errorCode)) {
432 handler.addAttribute(i++, cfpos.getStart(), cfpos.getLimit());
433 }
434 handler.getError(errorCode);
435 if (U_FAILURE(errorCode)) {
436 return FormattedList(errorCode);
437 }
438 result->sort();
439
440 return FormattedList(result.orphan());
441 }
442 #endif
443
444 UnicodeString& ListFormatter::format_(
445 const UnicodeString items[],
446 int32_t nItems,
447 UnicodeString& appendTo,
448 int32_t index,
449 int32_t &offset,
450 FieldPositionHandler* handler,
451 UErrorCode& errorCode) const {
452 #if !UCONFIG_NO_FORMATTING
453 offset = -1;
454 if (U_FAILURE(errorCode)) {
455 return appendTo;
456 }
457 if (data == nullptr) {
458 errorCode = U_INVALID_STATE_ERROR;
459 return appendTo;
460 }
461
462 if (nItems <= 0) {
463 return appendTo;
464 }
465 if (nItems == 1) {
466 if (index == 0) {
467 offset = appendTo.length();
468 }
469 if (handler != nullptr) {
470 handler->addAttribute(ULISTFMT_ELEMENT_FIELD,
471 appendTo.length(),
472 appendTo.length() + items[0].length());
473 }
474 appendTo.append(items[0]);
475 return appendTo;
476 }
477 UnicodeString result(items[0]);
478 if (index == 0) {
479 offset = 0;
480 }
481 int32_t offsetFirst;
482 int32_t offsetSecond;
483 int32_t prefixLength = 0;
484 // for n items, there are 2 * (n + 1) boundary including 0 and the upper
485 // edge.
486 MaybeStackArray<int32_t, 10> offsets((handler != nullptr) ? 2 * (nItems + 1): 0);
487 joinStringsAndReplace(
488 nItems == 2 ? data->twoPattern : data->startPattern,
489 result,
490 items[1],
491 result,
492 index == 1,
493 offset,
494 &offsetFirst,
495 &offsetSecond,
496 errorCode);
497 if (handler != nullptr) {
498 offsets[0] = 0;
499 prefixLength += offsetFirst;
500 offsets[1] = offsetSecond - prefixLength;
501 }
502 if (nItems > 2) {
503 for (int32_t i = 2; i < nItems - 1; ++i) {
504 joinStringsAndReplace(
505 data->middlePattern,
506 result,
507 items[i],
508 result,
509 index == i,
510 offset,
511 &offsetFirst,
512 &offsetSecond,
513 errorCode);
514 if (handler != nullptr) {
515 prefixLength += offsetFirst;
516 offsets[i] = offsetSecond - prefixLength;
517 }
518 }
519 joinStringsAndReplace(
520 data->endPattern,
521 result,
522 items[nItems - 1],
523 result,
524 index == nItems - 1,
525 offset,
526 &offsetFirst,
527 &offsetSecond,
528 errorCode);
529 if (handler != nullptr) {
530 prefixLength += offsetFirst;
531 offsets[nItems - 1] = offsetSecond - prefixLength;
532 }
533 }
534 if (handler != nullptr) {
535 // If there are already some data in appendTo, we need to adjust the index
536 // by shifting that lenght while insert into handler.
537 int32_t shift = appendTo.length() + prefixLength;
538 // Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements
539 for (int32_t i = 0; i < nItems; ++i) {
540 offsets[i + nItems] = offsets[i] + items[i].length() + shift;
541 offsets[i] += shift;
542 handler->addAttribute(
543 ULISTFMT_ELEMENT_FIELD, // id
544 offsets[i], // index
545 offsets[i + nItems]); // limit
546 }
547 // The locale pattern may reorder the items (such as in ur-IN locale),
548 // so we cannot assume the array is in accendning order.
549 // To handle the edging case, just insert the two ends into the array
550 // and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies
551 // between the even and odd position are not the same in the sorted array.
552 offsets[2 * nItems] = shift - prefixLength;
553 offsets[2 * nItems + 1] = result.length() + shift - prefixLength;
554 uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t),
555 uprv_int32Comparator, nullptr,
556 false, &errorCode);
557 for (int32_t i = 0; i <= nItems; ++i) {
558 if (offsets[i * 2] != offsets[i * 2 + 1]) {
559 handler->addAttribute(
560 ULISTFMT_LITERAL_FIELD, // id
561 offsets[i * 2], // index
562 offsets[i * 2 + 1]); // limit
563 }
564 }
565 }
566 if (U_SUCCESS(errorCode)) {
567 if (offset >= 0) {
568 offset += appendTo.length();
569 }
570 appendTo += result;
571 }
572 #endif
573 return appendTo;
574 }
575
576 U_NAMESPACE_END