icuSources/i18n/choicfmt.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 * Copyright (C) 1997-2013, International Business Machines Corporation and    *
   6 * others. All Rights Reserved.                                                *
   7 *******************************************************************************
   8 *
   9 * File CHOICFMT.CPP
  10 *
  11 * Modification History:
  12 *
  13 *   Date        Name        Description
  14 *   02/19/97    aliu        Converted from java.
  15 *   03/20/97    helena      Finished first cut of implementation and got rid
  16 *                           of nextDouble/previousDouble and replaced with
  17 *                           boolean array.
  18 *   4/10/97     aliu        Clean up.  Modified to work on AIX.
  19 *   06/04/97    helena      Fixed applyPattern(), toPattern() and not to include
  20 *                           wchar.h.
  21 *   07/09/97    helena      Made ParsePosition into a class.
  22 *   08/06/97    nos         removed overloaded constructor, fixed 'format(array)'
  23 *   07/22/98    stephen     JDK 1.2 Sync - removed UBool array (doubleFlags)
  24 *   02/22/99    stephen     Removed character literals for EBCDIC safety
  25 ********************************************************************************
  26 */
  27
  28 #include "unicode/utypes.h"
  29
  30 #if !UCONFIG_NO_FORMATTING
  31
  32 #include "unicode/choicfmt.h"
  33 #include "unicode/numfmt.h"
  34 #include "unicode/locid.h"
  35 #include "cpputils.h"
  36 #include "cstring.h"
  37 #include "messageimpl.h"
  38 #include "putilimp.h"
  39 #include "uassert.h"
  40 #include <stdio.h>
  41 #include <float.h>
  42
  43 // *****************************************************************************
  44 // class ChoiceFormat
  45 // *****************************************************************************
  46
  47 U_NAMESPACE_BEGIN
  48
  49 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
  50
  51 // Special characters used by ChoiceFormat.  There are two characters
  52 // used interchangeably to indicate <=.  Either is parsed, but only
  53 // LESS_EQUAL is generated by toPattern().
  54 #define SINGLE_QUOTE ((UChar)0x0027)   /*'*/
  55 #define LESS_THAN    ((UChar)0x003C)   /*<*/
  56 #define LESS_EQUAL   ((UChar)0x0023)   /*#*/
  57 #define LESS_EQUAL2  ((UChar)0x2264)
  58 #define VERTICAL_BAR ((UChar)0x007C)   /*|*/
  59 #define MINUS        ((UChar)0x002D)   /*-*/
  60
  61 static const UChar LEFT_CURLY_BRACE = 0x7B;     /*{*/
  62 static const UChar RIGHT_CURLY_BRACE = 0x7D;    /*}*/
  63
  64 #ifdef INFINITY
  65 #undef INFINITY
  66 #endif
  67 #define INFINITY     ((UChar)0x221E)
  68
  69 //static const UChar gPositiveInfinity[] = {INFINITY, 0};
  70 //static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
  71 #define POSITIVE_INF_STRLEN 1
  72 #define NEGATIVE_INF_STRLEN 2
  73
  74 // -------------------------------------
  75 // Creates a ChoiceFormat instance based on the pattern.
  76
  77 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
  78                            UErrorCode& status)
  79 : constructorErrorCode(status),
  80   msgPattern(status)
  81 {
  82     applyPattern(newPattern, status);
  83 }
  84
  85 // -------------------------------------
  86 // Creates a ChoiceFormat instance with the limit array and
  87 // format strings for each limit.
  88
  89 ChoiceFormat::ChoiceFormat(const double* limits,
  90                            const UnicodeString* formats,
  91                            int32_t cnt )
  92 : constructorErrorCode(U_ZERO_ERROR),
  93   msgPattern(constructorErrorCode)
  94 {
  95     setChoices(limits, NULL, formats, cnt, constructorErrorCode);
  96 }
  97
  98 // -------------------------------------
  99
 100 ChoiceFormat::ChoiceFormat(const double* limits,
 101                            const UBool* closures,
 102                            const UnicodeString* formats,
 103                            int32_t cnt )
 104 : constructorErrorCode(U_ZERO_ERROR),
 105   msgPattern(constructorErrorCode)
 106 {
 107     setChoices(limits, closures, formats, cnt, constructorErrorCode);
 108 }
 109
 110 // -------------------------------------
 111 // copy constructor
 112
 113 ChoiceFormat::ChoiceFormat(const    ChoiceFormat&   that)
 114 : NumberFormat(that),
 115   constructorErrorCode(that.constructorErrorCode),
 116   msgPattern(that.msgPattern)
 117 {
 118 }
 119
 120 // -------------------------------------
 121 // Private constructor that creates a
 122 // ChoiceFormat instance based on the
 123 // pattern and populates UParseError
 124
 125 ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
 126                            UParseError& parseError,
 127                            UErrorCode& status)
 128 : constructorErrorCode(status),
 129   msgPattern(status)
 130 {
 131     applyPattern(newPattern,parseError, status);
 132 }
 133 // -------------------------------------
 134
 135 UBool
 136 ChoiceFormat::operator==(const Format& that) const
 137 {
 138     if (this == &that) return TRUE;
 139     if (!NumberFormat::operator==(that)) return FALSE;
 140     ChoiceFormat& thatAlias = (ChoiceFormat&)that;
 141     return msgPattern == thatAlias.msgPattern;
 142 }
 143
 144 // -------------------------------------
 145 // copy constructor
 146
 147 const ChoiceFormat&
 148 ChoiceFormat::operator=(const   ChoiceFormat& that)
 149 {
 150     if (this != &that) {
 151         NumberFormat::operator=(that);
 152         constructorErrorCode = that.constructorErrorCode;
 153         msgPattern = that.msgPattern;
 154     }
 155     return *this;
 156 }
 157
 158 // -------------------------------------
 159
 160 ChoiceFormat::~ChoiceFormat()
 161 {
 162 }
 163
 164 // -------------------------------------
 165
 166 /**
 167  * Convert a double value to a string without the overhead of NumberFormat.
 168  */
 169 UnicodeString&
 170 ChoiceFormat::dtos(double value,
 171                    UnicodeString& string)
 172 {
 173     /* Buffer to contain the digits and any extra formatting stuff. */
 174     char temp[DBL_DIG + 16];
 175     char *itrPtr = temp;
 176     char *expPtr;
 177
 178     sprintf(temp, "%.*g", DBL_DIG, value);
 179
 180     /* Find and convert the decimal point.
 181        Using setlocale on some machines will cause sprintf to use a comma for certain locales.
 182     */
 183     while (*itrPtr && (*itrPtr == '-' || isdigit(*itrPtr))) {
 184         itrPtr++;
 185     }
 186     if (*itrPtr != 0 && *itrPtr != 'e') {
 187         /* We reached something that looks like a decimal point.
 188         In case someone used setlocale(), which changes the decimal point. */
 189         *itrPtr = '.';
 190         itrPtr++;
 191     }
 192     /* Search for the exponent */
 193     while (*itrPtr && *itrPtr != 'e') {
 194         itrPtr++;
 195     }
 196     if (*itrPtr == 'e') {
 197         itrPtr++;
 198         /* Verify the exponent sign */
 199         if (*itrPtr == '+' || *itrPtr == '-') {
 200             itrPtr++;
 201         }
 202         /* Remove leading zeros. You will see this on Windows machines. */
 203         expPtr = itrPtr;
 204         while (*itrPtr == '0') {
 205             itrPtr++;
 206         }
 207         if (*itrPtr && expPtr != itrPtr) {
 208             /* Shift the exponent without zeros. */
 209             while (*itrPtr) {
 210                 *(expPtr++)  = *(itrPtr++);
 211             }
 212             // NULL terminate
 213             *expPtr = 0;
 214         }
 215     }
 216
 217     string = UnicodeString(temp, -1, US_INV);    /* invariant codepage */
 218     return string;
 219 }
 220
 221 // -------------------------------------
 222 // calls the overloaded applyPattern method.
 223
 224 void
 225 ChoiceFormat::applyPattern(const UnicodeString& pattern,
 226                            UErrorCode& status)
 227 {
 228     msgPattern.parseChoiceStyle(pattern, NULL, status);
 229     constructorErrorCode = status;
 230 }
 231
 232 // -------------------------------------
 233 // Applies the pattern to this ChoiceFormat instance.
 234
 235 void
 236 ChoiceFormat::applyPattern(const UnicodeString& pattern,
 237                            UParseError& parseError,
 238                            UErrorCode& status)
 239 {
 240     msgPattern.parseChoiceStyle(pattern, &parseError, status);
 241     constructorErrorCode = status;
 242 }
 243 // -------------------------------------
 244 // Returns the input pattern string.
 245
 246 UnicodeString&
 247 ChoiceFormat::toPattern(UnicodeString& result) const
 248 {
 249     return result = msgPattern.getPatternString();
 250 }
 251
 252 // -------------------------------------
 253 // Sets the limit and format arrays.
 254 void
 255 ChoiceFormat::setChoices(  const double* limits,
 256                            const UnicodeString* formats,
 257                            int32_t cnt )
 258 {
 259     UErrorCode errorCode = U_ZERO_ERROR;
 260     setChoices(limits, NULL, formats, cnt, errorCode);
 261 }
 262
 263 // -------------------------------------
 264 // Sets the limit and format arrays.
 265 void
 266 ChoiceFormat::setChoices(  const double* limits,
 267                            const UBool* closures,
 268                            const UnicodeString* formats,
 269                            int32_t cnt )
 270 {
 271     UErrorCode errorCode = U_ZERO_ERROR;
 272     setChoices(limits, closures, formats, cnt, errorCode);
 273 }
 274
 275 void
 276 ChoiceFormat::setChoices(const double* limits,
 277                          const UBool* closures,
 278                          const UnicodeString* formats,
 279                          int32_t count,
 280                          UErrorCode &errorCode) {
 281     if (U_FAILURE(errorCode)) {
 282         return;
 283     }
 284     if (limits == NULL || formats == NULL) {
 285         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
 286         return;
 287     }
 288     // Reconstruct the original input pattern.
 289     // Modified version of the pre-ICU 4.8 toPattern() implementation.
 290     UnicodeString result;
 291     for (int32_t i = 0; i < count; ++i) {
 292         if (i != 0) {
 293             result += VERTICAL_BAR;
 294         }
 295         UnicodeString buf;
 296         if (uprv_isPositiveInfinity(limits[i])) {
 297             result += INFINITY;
 298         } else if (uprv_isNegativeInfinity(limits[i])) {
 299             result += MINUS;
 300             result += INFINITY;
 301         } else {
 302             result += dtos(limits[i], buf);
 303         }
 304         if (closures != NULL && closures[i]) {
 305             result += LESS_THAN;
 306         } else {
 307             result += LESS_EQUAL;
 308         }
 309         // Append formats[i], using quotes if there are special
 310         // characters.  Single quotes themselves must be escaped in
 311         // either case.
 312         const UnicodeString& text = formats[i];
 313         int32_t textLength = text.length();
 314         int32_t nestingLevel = 0;
 315         for (int32_t j = 0; j < textLength; ++j) {
 316             UChar c = text[j];
 317             if (c == SINGLE_QUOTE && nestingLevel == 0) {
 318                 // Double each top-level apostrophe.
 319                 result.append(c);
 320             } else if (c == VERTICAL_BAR && nestingLevel == 0) {
 321                 // Surround each pipe symbol with apostrophes for quoting.
 322                 // If the next character is an apostrophe, then that will be doubled,
 323                 // and although the parser will see the apostrophe pairs beginning
 324                 // and ending one character earlier than our doubling, the result
 325                 // is as desired.
 326                 //   | -> '|'
 327                 //   |' -> '|'''
 328                 //   |'' -> '|''''' etc.
 329                 result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
 330                 continue;  // Skip the append(c) at the end of the loop body.
 331             } else if (c == LEFT_CURLY_BRACE) {
 332                 ++nestingLevel;
 333             } else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
 334                 --nestingLevel;
 335             }
 336             result.append(c);
 337         }
 338     }
 339     // Apply the reconstructed pattern.
 340     applyPattern(result, errorCode);
 341 }
 342
 343 // -------------------------------------
 344 // Gets the limit array.
 345
 346 const double*
 347 ChoiceFormat::getLimits(int32_t& cnt) const
 348 {
 349     cnt = 0;
 350     return NULL;
 351 }
 352
 353 // -------------------------------------
 354 // Gets the closures array.
 355
 356 const UBool*
 357 ChoiceFormat::getClosures(int32_t& cnt) const
 358 {
 359     cnt = 0;
 360     return NULL;
 361 }
 362
 363 // -------------------------------------
 364 // Gets the format array.
 365
 366 const UnicodeString*
 367 ChoiceFormat::getFormats(int32_t& cnt) const
 368 {
 369     cnt = 0;
 370     return NULL;
 371 }
 372
 373 // -------------------------------------
 374 // Formats an int64 number, it's actually formatted as
 375 // a double.  The returned format string may differ
 376 // from the input number because of this.
 377
 378 UnicodeString&
 379 ChoiceFormat::format(int64_t number,
 380                      UnicodeString& appendTo,
 381                      FieldPosition& status) const
 382 {
 383     return format((double) number, appendTo, status);
 384 }
 385
 386 // -------------------------------------
 387 // Formats an int32_t number, it's actually formatted as
 388 // a double.
 389
 390 UnicodeString&
 391 ChoiceFormat::format(int32_t number,
 392                      UnicodeString& appendTo,
 393                      FieldPosition& status) const
 394 {
 395     return format((double) number, appendTo, status);
 396 }
 397
 398 // -------------------------------------
 399 // Formats a double number.
 400
 401 UnicodeString&
 402 ChoiceFormat::format(double number,
 403                      UnicodeString& appendTo,
 404                      FieldPosition& /*pos*/) const
 405 {
 406     if (msgPattern.countParts() == 0) {
 407         // No pattern was applied, or it failed.
 408         return appendTo;
 409     }
 410     // Get the appropriate sub-message.
 411     int32_t msgStart = findSubMessage(msgPattern, 0, number);
 412     if (!MessageImpl::jdkAposMode(msgPattern)) {
 413         int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
 414         int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
 415         appendTo.append(msgPattern.getPatternString(),
 416                         patternStart,
 417                         msgPattern.getPatternIndex(msgLimit) - patternStart);
 418         return appendTo;
 419     }
 420     // JDK compatibility mode: Remove SKIP_SYNTAX.
 421     return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
 422 }
 423
 424 int32_t
 425 ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
 426     int32_t count = pattern.countParts();
 427     int32_t msgStart;
 428     // Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
 429     // until ARG_LIMIT or end of choice-only pattern.
 430     // Ignore the first number and selector and start the loop on the first message.
 431     partIndex += 2;
 432     for (;;) {
 433         // Skip but remember the current sub-message.
 434         msgStart = partIndex;
 435         partIndex = pattern.getLimitPartIndex(partIndex);
 436         if (++partIndex >= count) {
 437             // Reached the end of the choice-only pattern.
 438             // Return with the last sub-message.
 439             break;
 440         }
 441         const MessagePattern::Part &part = pattern.getPart(partIndex++);
 442         UMessagePatternPartType type = part.getType();
 443         if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
 444             // Reached the end of the ChoiceFormat style.
 445             // Return with the last sub-message.
 446             break;
 447         }
 448         // part is an ARG_INT or ARG_DOUBLE
 449         U_ASSERT(MessagePattern::Part::hasNumericValue(type));
 450         double boundary = pattern.getNumericValue(part);
 451         // Fetch the ARG_SELECTOR character.
 452         int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
 453         UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
 454         if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
 455             // The number is in the interval between the previous boundary and the current one.
 456             // Return with the sub-message between them.
 457             // The !(a>b) and !(a>=b) comparisons are equivalent to
 458             // (a<=b) and (a<b) except they "catch" NaN.
 459             break;
 460         }
 461     }
 462     return msgStart;
 463 }
 464
 465 // -------------------------------------
 466 // Formats an array of objects. Checks if the data type of the objects
 467 // to get the right value for formatting.
 468
 469 UnicodeString&
 470 ChoiceFormat::format(const Formattable* objs,
 471                      int32_t cnt,
 472                      UnicodeString& appendTo,
 473                      FieldPosition& pos,
 474                      UErrorCode& status) const
 475 {
 476     if(cnt < 0) {
 477         status = U_ILLEGAL_ARGUMENT_ERROR;
 478         return appendTo;
 479     }
 480     if (msgPattern.countParts() == 0) {
 481         status = U_INVALID_STATE_ERROR;
 482         return appendTo;
 483     }
 484
 485     for (int32_t i = 0; i < cnt; i++) {
 486         double objDouble = objs[i].getDouble(status);
 487         if (U_SUCCESS(status)) {
 488             format(objDouble, appendTo, pos);
 489         }
 490     }
 491
 492     return appendTo;
 493 }
 494
 495 // -------------------------------------
 496
 497 void
 498 ChoiceFormat::parse(const UnicodeString& text,
 499                     Formattable& result,
 500                     ParsePosition& pos) const
 501 {
 502     result.setDouble(parseArgument(msgPattern, 0, text, pos));
 503 }
 504
 505 double
 506 ChoiceFormat::parseArgument(
 507         const MessagePattern &pattern, int32_t partIndex,
 508         const UnicodeString &source, ParsePosition &pos) {
 509     // find the best number (defined as the one with the longest parse)
 510     int32_t start = pos.getIndex();
 511     int32_t furthest = start;
 512     double bestNumber = uprv_getNaN();
 513     double tempNumber = 0.0;
 514     int32_t count = pattern.countParts();
 515     while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
 516         tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
 517         partIndex += 2;  // skip the numeric part and ignore the ARG_SELECTOR
 518         int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
 519         int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
 520         if (len >= 0) {
 521             int32_t newIndex = start + len;
 522             if (newIndex > furthest) {
 523                 furthest = newIndex;
 524                 bestNumber = tempNumber;
 525                 if (furthest == source.length()) {
 526                     break;
 527                 }
 528             }
 529         }
 530         partIndex = msgLimit + 1;
 531     }
 532     if (furthest == start) {
 533         pos.setErrorIndex(start);
 534     } else {
 535         pos.setIndex(furthest);
 536     }
 537     return bestNumber;
 538 }
 539
 540 int32_t
 541 ChoiceFormat::matchStringUntilLimitPart(
 542         const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
 543         const UnicodeString &source, int32_t sourceOffset) {
 544     int32_t matchingSourceLength = 0;
 545     const UnicodeString &msgString = pattern.getPatternString();
 546     int32_t prevIndex = pattern.getPart(partIndex).getLimit();
 547     for (;;) {
 548         const MessagePattern::Part &part = pattern.getPart(++partIndex);
 549         if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
 550             int32_t index = part.getIndex();
 551             int32_t length = index - prevIndex;
 552             if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
 553                 return -1;  // mismatch
 554             }
 555             matchingSourceLength += length;
 556             if (partIndex == limitPartIndex) {
 557                 return matchingSourceLength;
 558             }
 559             prevIndex = part.getLimit();  // SKIP_SYNTAX
 560         }
 561     }
 562 }
 563
 564 // -------------------------------------
 565
 566 Format*
 567 ChoiceFormat::clone() const
 568 {
 569     ChoiceFormat *aCopy = new ChoiceFormat(*this);
 570     return aCopy;
 571 }
 572
 573 U_NAMESPACE_END
 574
 575 #endif /* #if !UCONFIG_NO_FORMATTING */
 576
 577 //eof