]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/choicfmt.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / choicfmt.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4*******************************************************************************
57a6839d 5* Copyright (C) 1997-2013, International Business Machines Corporation and *
b75a7d8f
A
6* others. All Rights Reserved. *
7*******************************************************************************
8*
9* File CHOICFMT.CPP
10*
11* Modification History:
12*
13* Date Name Description
14* 02/19/97 aliu Converted from java.
15* 03/20/97 helena Finished first cut of implementation and got rid
16* of nextDouble/previousDouble and replaced with
17* boolean array.
18* 4/10/97 aliu Clean up. Modified to work on AIX.
19* 06/04/97 helena Fixed applyPattern(), toPattern() and not to include
20* wchar.h.
21* 07/09/97 helena Made ParsePosition into a class.
22* 08/06/97 nos removed overloaded constructor, fixed 'format(array)'
23* 07/22/98 stephen JDK 1.2 Sync - removed UBool array (doubleFlags)
24* 02/22/99 stephen Removed character literals for EBCDIC safety
25********************************************************************************
26*/
27
28#include "unicode/utypes.h"
29
30#if !UCONFIG_NO_FORMATTING
31
32#include "unicode/choicfmt.h"
33#include "unicode/numfmt.h"
34#include "unicode/locid.h"
35#include "cpputils.h"
374ca955 36#include "cstring.h"
4388f060 37#include "messageimpl.h"
374ca955 38#include "putilimp.h"
4388f060 39#include "uassert.h"
73c04bcf
A
40#include <stdio.h>
41#include <float.h>
b75a7d8f
A
42
43// *****************************************************************************
44// class ChoiceFormat
45// *****************************************************************************
46
47U_NAMESPACE_BEGIN
48
374ca955 49UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
b75a7d8f
A
50
51// Special characters used by ChoiceFormat. There are two characters
52// used interchangeably to indicate <=. Either is parsed, but only
53// LESS_EQUAL is generated by toPattern().
54#define SINGLE_QUOTE ((UChar)0x0027) /*'*/
55#define LESS_THAN ((UChar)0x003C) /*<*/
56#define LESS_EQUAL ((UChar)0x0023) /*#*/
57#define LESS_EQUAL2 ((UChar)0x2264)
58#define VERTICAL_BAR ((UChar)0x007C) /*|*/
59#define MINUS ((UChar)0x002D) /*-*/
729e4ab9 60
4388f060
A
61static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
62static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
63
729e4ab9
A
64#ifdef INFINITY
65#undef INFINITY
66#endif
b75a7d8f
A
67#define INFINITY ((UChar)0x221E)
68
51004dcb
A
69//static const UChar gPositiveInfinity[] = {INFINITY, 0};
70//static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
b75a7d8f
A
71#define POSITIVE_INF_STRLEN 1
72#define NEGATIVE_INF_STRLEN 2
73
74// -------------------------------------
75// Creates a ChoiceFormat instance based on the pattern.
76
77ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
78 UErrorCode& status)
4388f060
A
79: constructorErrorCode(status),
80 msgPattern(status)
b75a7d8f
A
81{
82 applyPattern(newPattern, status);
83}
84
85// -------------------------------------
86// Creates a ChoiceFormat instance with the limit array and
87// format strings for each limit.
88
89ChoiceFormat::ChoiceFormat(const double* limits,
90 const UnicodeString* formats,
91 int32_t cnt )
4388f060
A
92: constructorErrorCode(U_ZERO_ERROR),
93 msgPattern(constructorErrorCode)
b75a7d8f 94{
4388f060 95 setChoices(limits, NULL, formats, cnt, constructorErrorCode);
b75a7d8f
A
96}
97
98// -------------------------------------
99
100ChoiceFormat::ChoiceFormat(const double* limits,
101 const UBool* closures,
102 const UnicodeString* formats,
103 int32_t cnt )
4388f060
A
104: constructorErrorCode(U_ZERO_ERROR),
105 msgPattern(constructorErrorCode)
b75a7d8f 106{
4388f060 107 setChoices(limits, closures, formats, cnt, constructorErrorCode);
b75a7d8f
A
108}
109
110// -------------------------------------
111// copy constructor
112
113ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
114: NumberFormat(that),
4388f060
A
115 constructorErrorCode(that.constructorErrorCode),
116 msgPattern(that.msgPattern)
b75a7d8f 117{
b75a7d8f
A
118}
119
120// -------------------------------------
121// Private constructor that creates a
122// ChoiceFormat instance based on the
123// pattern and populates UParseError
124
125ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
126 UParseError& parseError,
127 UErrorCode& status)
4388f060
A
128: constructorErrorCode(status),
129 msgPattern(status)
b75a7d8f
A
130{
131 applyPattern(newPattern,parseError, status);
132}
133// -------------------------------------
134
135UBool
136ChoiceFormat::operator==(const Format& that) const
137{
138 if (this == &that) return TRUE;
b75a7d8f
A
139 if (!NumberFormat::operator==(that)) return FALSE;
140 ChoiceFormat& thatAlias = (ChoiceFormat&)that;
4388f060 141 return msgPattern == thatAlias.msgPattern;
b75a7d8f
A
142}
143
144// -------------------------------------
145// copy constructor
146
147const ChoiceFormat&
148ChoiceFormat::operator=(const ChoiceFormat& that)
149{
150 if (this != &that) {
151 NumberFormat::operator=(that);
4388f060
A
152 constructorErrorCode = that.constructorErrorCode;
153 msgPattern = that.msgPattern;
b75a7d8f
A
154 }
155 return *this;
156}
157
158// -------------------------------------
159
160ChoiceFormat::~ChoiceFormat()
161{
b75a7d8f
A
162}
163
164// -------------------------------------
165
166/**
4388f060 167 * Convert a double value to a string without the overhead of NumberFormat.
b75a7d8f
A
168 */
169UnicodeString&
170ChoiceFormat::dtos(double value,
171 UnicodeString& string)
172{
73c04bcf
A
173 /* Buffer to contain the digits and any extra formatting stuff. */
174 char temp[DBL_DIG + 16];
175 char *itrPtr = temp;
46f4442e 176 char *expPtr;
73c04bcf 177
46f4442e 178 sprintf(temp, "%.*g", DBL_DIG, value);
73c04bcf
A
179
180 /* Find and convert the decimal point.
181 Using setlocale on some machines will cause sprintf to use a comma for certain locales.
182 */
183 while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
184 itrPtr++;
185 }
46f4442e
A
186 if (*itrPtr != 0 && *itrPtr != 'e') {
187 /* We reached something that looks like a decimal point.
188 In case someone used setlocale(), which changes the decimal point. */
73c04bcf 189 *itrPtr = '.';
46f4442e 190 itrPtr++;
73c04bcf 191 }
46f4442e
A
192 /* Search for the exponent */
193 while (*itrPtr && *itrPtr != 'e') {
194 itrPtr++;
195 }
196 if (*itrPtr == 'e') {
197 itrPtr++;
198 /* Verify the exponent sign */
199 if (*itrPtr == '+' || *itrPtr == '-') {
200 itrPtr++;
201 }
202 /* Remove leading zeros. You will see this on Windows machines. */
203 expPtr = itrPtr;
204 while (*itrPtr == '0') {
205 itrPtr++;
206 }
207 if (*itrPtr && expPtr != itrPtr) {
208 /* Shift the exponent without zeros. */
209 while (*itrPtr) {
210 *(expPtr++) = *(itrPtr++);
211 }
212 // NULL terminate
213 *expPtr = 0;
73c04bcf
A
214 }
215 }
46f4442e 216
374ca955 217 string = UnicodeString(temp, -1, US_INV); /* invariant codepage */
b75a7d8f
A
218 return string;
219}
220
221// -------------------------------------
222// calls the overloaded applyPattern method.
223
224void
225ChoiceFormat::applyPattern(const UnicodeString& pattern,
226 UErrorCode& status)
227{
4388f060
A
228 msgPattern.parseChoiceStyle(pattern, NULL, status);
229 constructorErrorCode = status;
b75a7d8f
A
230}
231
232// -------------------------------------
233// Applies the pattern to this ChoiceFormat instance.
234
235void
236ChoiceFormat::applyPattern(const UnicodeString& pattern,
237 UParseError& parseError,
238 UErrorCode& status)
239{
4388f060
A
240 msgPattern.parseChoiceStyle(pattern, &parseError, status);
241 constructorErrorCode = status;
b75a7d8f
A
242}
243// -------------------------------------
4388f060 244// Returns the input pattern string.
b75a7d8f
A
245
246UnicodeString&
247ChoiceFormat::toPattern(UnicodeString& result) const
248{
4388f060 249 return result = msgPattern.getPatternString();
b75a7d8f
A
250}
251
b75a7d8f
A
252// -------------------------------------
253// Sets the limit and format arrays.
254void
255ChoiceFormat::setChoices( const double* limits,
256 const UnicodeString* formats,
257 int32_t cnt )
258{
4388f060
A
259 UErrorCode errorCode = U_ZERO_ERROR;
260 setChoices(limits, NULL, formats, cnt, errorCode);
b75a7d8f
A
261}
262
263// -------------------------------------
264// Sets the limit and format arrays.
265void
266ChoiceFormat::setChoices( const double* limits,
267 const UBool* closures,
268 const UnicodeString* formats,
269 int32_t cnt )
270{
4388f060
A
271 UErrorCode errorCode = U_ZERO_ERROR;
272 setChoices(limits, closures, formats, cnt, errorCode);
273}
b75a7d8f 274
4388f060
A
275void
276ChoiceFormat::setChoices(const double* limits,
277 const UBool* closures,
278 const UnicodeString* formats,
279 int32_t count,
280 UErrorCode &errorCode) {
281 if (U_FAILURE(errorCode)) {
282 return;
46f4442e 283 }
4388f060
A
284 if (limits == NULL || formats == NULL) {
285 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286 return;
46f4442e 287 }
4388f060
A
288 // Reconstruct the original input pattern.
289 // Modified version of the pre-ICU 4.8 toPattern() implementation.
290 UnicodeString result;
291 for (int32_t i = 0; i < count; ++i) {
292 if (i != 0) {
293 result += VERTICAL_BAR;
46f4442e 294 }
4388f060
A
295 UnicodeString buf;
296 if (uprv_isPositiveInfinity(limits[i])) {
297 result += INFINITY;
298 } else if (uprv_isNegativeInfinity(limits[i])) {
299 result += MINUS;
300 result += INFINITY;
301 } else {
302 result += dtos(limits[i], buf);
46f4442e 303 }
4388f060
A
304 if (closures != NULL && closures[i]) {
305 result += LESS_THAN;
306 } else {
307 result += LESS_EQUAL;
46f4442e 308 }
4388f060
A
309 // Append formats[i], using quotes if there are special
310 // characters. Single quotes themselves must be escaped in
311 // either case.
312 const UnicodeString& text = formats[i];
313 int32_t textLength = text.length();
314 int32_t nestingLevel = 0;
315 for (int32_t j = 0; j < textLength; ++j) {
316 UChar c = text[j];
317 if (c == SINGLE_QUOTE && nestingLevel == 0) {
318 // Double each top-level apostrophe.
319 result.append(c);
320 } else if (c == VERTICAL_BAR && nestingLevel == 0) {
321 // Surround each pipe symbol with apostrophes for quoting.
322 // If the next character is an apostrophe, then that will be doubled,
323 // and although the parser will see the apostrophe pairs beginning
324 // and ending one character earlier than our doubling, the result
325 // is as desired.
326 // | -> '|'
327 // |' -> '|'''
328 // |'' -> '|''''' etc.
329 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
330 continue; // Skip the append(c) at the end of the loop body.
331 } else if (c == LEFT_CURLY_BRACE) {
332 ++nestingLevel;
333 } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
334 --nestingLevel;
335 }
336 result.append(c);
b75a7d8f
A
337 }
338 }
4388f060
A
339 // Apply the reconstructed pattern.
340 applyPattern(result, errorCode);
b75a7d8f
A
341}
342
343// -------------------------------------
344// Gets the limit array.
345
346const double*
347ChoiceFormat::getLimits(int32_t& cnt) const
348{
4388f060
A
349 cnt = 0;
350 return NULL;
b75a7d8f
A
351}
352
353// -------------------------------------
354// Gets the closures array.
355
356const UBool*
357ChoiceFormat::getClosures(int32_t& cnt) const
358{
4388f060
A
359 cnt = 0;
360 return NULL;
b75a7d8f
A
361}
362
363// -------------------------------------
364// Gets the format array.
365
366const UnicodeString*
367ChoiceFormat::getFormats(int32_t& cnt) const
368{
4388f060
A
369 cnt = 0;
370 return NULL;
b75a7d8f
A
371}
372
374ca955
A
373// -------------------------------------
374// Formats an int64 number, it's actually formatted as
375// a double. The returned format string may differ
376// from the input number because of this.
377
378UnicodeString&
379ChoiceFormat::format(int64_t number,
380 UnicodeString& appendTo,
381 FieldPosition& status) const
382{
383 return format((double) number, appendTo, status);
384}
385
b75a7d8f 386// -------------------------------------
4388f060
A
387// Formats an int32_t number, it's actually formatted as
388// a double.
b75a7d8f
A
389
390UnicodeString&
391ChoiceFormat::format(int32_t number,
392 UnicodeString& appendTo,
393 FieldPosition& status) const
394{
395 return format((double) number, appendTo, status);
396}
397
398// -------------------------------------
399// Formats a double number.
400
401UnicodeString&
402ChoiceFormat::format(double number,
403 UnicodeString& appendTo,
404 FieldPosition& /*pos*/) const
405{
4388f060
A
406 if (msgPattern.countParts() == 0) {
407 // No pattern was applied, or it failed.
408 return appendTo;
409 }
410 // Get the appropriate sub-message.
411 int32_t msgStart = findSubMessage(msgPattern, 0, number);
412 if (!MessageImpl::jdkAposMode(msgPattern)) {
413 int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
414 int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
415 appendTo.append(msgPattern.getPatternString(),
416 patternStart,
417 msgPattern.getPatternIndex(msgLimit) - patternStart);
418 return appendTo;
419 }
420 // JDK compatibility mode: Remove SKIP_SYNTAX.
421 return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
422}
423
424int32_t
425ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
426 int32_t count = pattern.countParts();
427 int32_t msgStart;
428 // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
429 // until ARG_LIMIT or end of choice-only pattern.
430 // Ignore the first number and selector and start the loop on the first message.
431 partIndex += 2;
432 for (;;) {
433 // Skip but remember the current sub-message.
434 msgStart = partIndex;
435 partIndex = pattern.getLimitPartIndex(partIndex);
436 if (++partIndex >= count) {
437 // Reached the end of the choice-only pattern.
438 // Return with the last sub-message.
439 break;
440 }
441 const MessagePattern::Part &part = pattern.getPart(partIndex++);
442 UMessagePatternPartType type = part.getType();
443 if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
444 // Reached the end of the ChoiceFormat style.
445 // Return with the last sub-message.
446 break;
447 }
448 // part is an ARG_INT or ARG_DOUBLE
449 U_ASSERT(MessagePattern::Part::hasNumericValue(type));
450 double boundary = pattern.getNumericValue(part);
451 // Fetch the ARG_SELECTOR character.
452 int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
453 UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
454 if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
455 // The number is in the interval between the previous boundary and the current one.
456 // Return with the sub-message between them.
457 // The !(a>b) and !(a>=b) comparisons are equivalent to
458 // (a<=b) and (a<b) except they "catch" NaN.
b75a7d8f
A
459 break;
460 }
461 }
4388f060 462 return msgStart;
b75a7d8f
A
463}
464
465// -------------------------------------
466// Formats an array of objects. Checks if the data type of the objects
467// to get the right value for formatting.
468
469UnicodeString&
470ChoiceFormat::format(const Formattable* objs,
471 int32_t cnt,
472 UnicodeString& appendTo,
473 FieldPosition& pos,
474 UErrorCode& status) const
475{
476 if(cnt < 0) {
477 status = U_ILLEGAL_ARGUMENT_ERROR;
478 return appendTo;
479 }
4388f060
A
480 if (msgPattern.countParts() == 0) {
481 status = U_INVALID_STATE_ERROR;
482 return appendTo;
483 }
b75a7d8f 484
b75a7d8f 485 for (int32_t i = 0; i < cnt; i++) {
374ca955
A
486 double objDouble = objs[i].getDouble(status);
487 if (U_SUCCESS(status)) {
4388f060 488 format(objDouble, appendTo, pos);
374ca955 489 }
b75a7d8f
A
490 }
491
492 return appendTo;
493}
494
b75a7d8f
A
495// -------------------------------------
496
497void
498ChoiceFormat::parse(const UnicodeString& text,
499 Formattable& result,
4388f060 500 ParsePosition& pos) const
b75a7d8f 501{
4388f060
A
502 result.setDouble(parseArgument(msgPattern, 0, text, pos));
503}
504
505double
506ChoiceFormat::parseArgument(
507 const MessagePattern &pattern, int32_t partIndex,
508 const UnicodeString &source, ParsePosition &pos) {
b75a7d8f 509 // find the best number (defined as the one with the longest parse)
4388f060 510 int32_t start = pos.getIndex();
b75a7d8f
A
511 int32_t furthest = start;
512 double bestNumber = uprv_getNaN();
513 double tempNumber = 0.0;
4388f060
A
514 int32_t count = pattern.countParts();
515 while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
516 tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
517 partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
518 int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
519 int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
520 if (len >= 0) {
521 int32_t newIndex = start + len;
522 if (newIndex > furthest) {
523 furthest = newIndex;
b75a7d8f 524 bestNumber = tempNumber;
4388f060 525 if (furthest == source.length()) {
b75a7d8f 526 break;
4388f060 527 }
b75a7d8f
A
528 }
529 }
4388f060 530 partIndex = msgLimit + 1;
b75a7d8f 531 }
4388f060
A
532 if (furthest == start) {
533 pos.setErrorIndex(start);
534 } else {
535 pos.setIndex(furthest);
536 }
537 return bestNumber;
538}
539
540int32_t
541ChoiceFormat::matchStringUntilLimitPart(
542 const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
543 const UnicodeString &source, int32_t sourceOffset) {
544 int32_t matchingSourceLength = 0;
545 const UnicodeString &msgString = pattern.getPatternString();
546 int32_t prevIndex = pattern.getPart(partIndex).getLimit();
547 for (;;) {
548 const MessagePattern::Part &part = pattern.getPart(++partIndex);
549 if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
550 int32_t index = part.getIndex();
551 int32_t length = index - prevIndex;
552 if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
553 return -1; // mismatch
554 }
555 matchingSourceLength += length;
556 if (partIndex == limitPartIndex) {
557 return matchingSourceLength;
558 }
559 prevIndex = part.getLimit(); // SKIP_SYNTAX
560 }
b75a7d8f 561 }
b75a7d8f
A
562}
563
b75a7d8f
A
564// -------------------------------------
565
340931cb 566ChoiceFormat*
b75a7d8f
A
567ChoiceFormat::clone() const
568{
569 ChoiceFormat *aCopy = new ChoiceFormat(*this);
570 return aCopy;
571}
572
573U_NAMESPACE_END
574
575#endif /* #if !UCONFIG_NO_FORMATTING */
576
577//eof