icuSources/i18n/digitlst.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1997-2004, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File DIGITLST.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   03/21/97    clhuang     Converted from java.
  13 *   03/21/97    clhuang     Implemented with new APIs.
  14 *   03/27/97    helena      Updated to pass the simple test after code review.
  15 *   03/31/97    aliu        Moved isLONG_MIN to here, and fixed it.
  16 *   04/15/97    aliu        Changed MAX_COUNT to DBL_DIG.  Changed Digit to char.
  17 *                           Reworked representation by replacing fDecimalAt
  18 *                           with fExponent.
  19 *   04/16/97    aliu        Rewrote set() and getDouble() to use sprintf/atof
  20 *                           to do digit conversion.
  21 *   09/09/97    aliu        Modified for exponential notation support.
  22 *   08/02/98    stephen     Added nearest/even rounding
  23 *                            Fixed bug in fitsIntoLong
  24 ******************************************************************************
  25 */
  26
  27 #include "unicode/putil.h"
  28 #include "digitlst.h"
  29 #include "cstring.h"
  30 #include "putilimp.h"
  31 #include <stdlib.h>
  32 #include <limits.h>
  33 #include <string.h>
  34 #include <stdio.h>
  35
  36 // ***************************************************************************
  37 // class DigitList
  38 // This class handles the transcoding between numeric values and strings of
  39 //  characters.  Only handles as non-negative numbers.
  40 // ***************************************************************************
  41
  42 /**
  43  * This is the zero digit.  Array elements fDigits[i] have values from
  44  * kZero to kZero + 9.  Typically, this is '0'.
  45  */
  46 #define kZero '0'
  47
  48 static char gDecimal = 0;
  49
  50 /* Only for 32 bit numbers. Ignore the negative sign. */
  51 static const char LONG_MIN_REP[] = "2147483648";
  52 static const char I64_MIN_REP[] = "9223372036854775808";
  53
  54 static const int64_t I64_MIN_VALUE = U_INT64_MIN;
  55
  56 enum {
  57     LONG_MIN_REP_LENGTH = sizeof(LONG_MIN_REP) - 1, //Ignore the NULL at the end
  58     I64_MIN_REP_LENGTH = sizeof(I64_MIN_REP) - 1 //Ignore the NULL at the end
  59 };
  60
  61 U_NAMESPACE_BEGIN
  62
  63
  64 // -------------------------------------
  65 // default constructor
  66
  67 DigitList::DigitList()
  68 {
  69     fDigits = fDecimalDigits + 1;   // skip the decimal
  70     clear();
  71 }
  72
  73 // -------------------------------------
  74
  75 DigitList::~DigitList()
  76 {
  77 }
  78
  79 // -------------------------------------
  80 // copy constructor
  81
  82 DigitList::DigitList(const DigitList &other)
  83 {
  84     fDigits = fDecimalDigits + 1;   // skip the decimal
  85     *this = other;
  86 }
  87
  88 // -------------------------------------
  89 // assignment operator
  90
  91 DigitList&
  92 DigitList::operator=(const DigitList& other)
  93 {
  94     if (this != &other)
  95     {
  96         fDecimalAt = other.fDecimalAt;
  97         fCount = other.fCount;
  98         fIsPositive = other.fIsPositive;
  99         fRoundingMode = other.fRoundingMode;
 100         uprv_strncpy(fDigits, other.fDigits, fCount);
 101     }
 102     return *this;
 103 }
 104
 105 // -------------------------------------
 106
 107 UBool
 108 DigitList::operator==(const DigitList& that) const
 109 {
 110     return ((this == &that) ||
 111             (fDecimalAt == that.fDecimalAt &&
 112              fCount == that.fCount &&
 113              fIsPositive == that.fIsPositive &&
 114              fRoundingMode == that.fRoundingMode &&
 115              uprv_strncmp(fDigits, that.fDigits, fCount) == 0));
 116 }
 117
 118 // -------------------------------------
 119 // Resets the digit list; sets all the digits to zero.
 120
 121 void
 122 DigitList::clear()
 123 {
 124     fDecimalAt = 0;
 125     fCount = 0;
 126     fIsPositive = TRUE;
 127     fRoundingMode = DecimalFormat::kRoundHalfEven;
 128
 129     // Don't bother initializing fDigits because fCount is 0.
 130 }
 131
 132
 133
 134 // -------------------------------------
 135
 136 /**
 137  * Formats a number into a base 10 string representation, and NULL terminates it.
 138  * @param number The number to format
 139  * @param outputStr The string to output to
 140  * @param outputLen The maximum number of characters to put into outputStr
 141  *                  (including NULL).
 142  * @return the number of digits written, not including the sign.
 143  */
 144 static int32_t
 145 formatBase10(int64_t number, char *outputStr, int32_t outputLen)
 146 {
 147     char buffer[MAX_DIGITS + 1];
 148     int32_t bufferLen;
 149     int32_t result;
 150
 151     if (outputLen > MAX_DIGITS) {
 152         outputLen = MAX_DIGITS;     // Ignore NULL
 153     }
 154     else if (outputLen < 3) {
 155         return 0;                   // Not enough room
 156     }
 157
 158     bufferLen = outputLen;
 159
 160     if (number < 0) {   // Negative numbers are slightly larger than a postive
 161         buffer[bufferLen--] = (char)(-(number % 10) + kZero);
 162         number /= -10;
 163         *(outputStr++) = '-';
 164     }
 165     else {
 166         *(outputStr++) = '+';    // allow +0
 167     }
 168     while (bufferLen >= 0 && number) {      // Output the number
 169         buffer[bufferLen--] = (char)(number % 10 + kZero);
 170         number /= 10;
 171     }
 172
 173     result = outputLen - bufferLen++;
 174
 175     while (bufferLen <= outputLen) {     // Copy the number to output
 176         *(outputStr++) = buffer[bufferLen++];
 177     }
 178     *outputStr = 0;   // NULL terminate.
 179     return result;
 180 }
 181
 182 /**
 183  * Currently, getDouble() depends on atof() to do its conversion.
 184  *
 185  * WARNING!!
 186  * This is an extremely costly function. ~1/2 of the conversion time
 187  * can be linked to this function.
 188  */
 189 double
 190 DigitList::getDouble() /*const*/
 191 {
 192     double value;
 193
 194     if (fCount == 0) {
 195         value = 0.0;
 196     }
 197     else {
 198         char* end = NULL;
 199         if (!gDecimal) {
 200             char rep[MAX_DIGITS];
 201             // For machines that decide to change the decimal on you,
 202             // and try to be too smart with localization.
 203             // This normally should be just a '.'.
 204             sprintf(rep, "%+1.1f", 1.0);
 205             gDecimal = rep[2];
 206         }
 207
 208         *fDecimalDigits = gDecimal;
 209         *(fDigits+fCount) = 'e';    // add an e after the digits.
 210         formatBase10(fDecimalAt,
 211                      fDigits + fCount + 1,  // skip the 'e'
 212                      MAX_DEC_DIGITS - fCount - 3);  // skip the 'e' and '.'
 213         value = uprv_strtod(fDecimalDigits, &end);
 214     }
 215
 216     return fIsPositive ? value : -value;
 217 }
 218
 219 // -------------------------------------
 220
 221 /**
 222  * Make sure that fitsIntoLong() is called before calling this function.
 223  */
 224 int32_t DigitList::getLong() /*const*/
 225 {
 226     if (fCount == fDecimalAt) {
 227         int32_t value;
 228
 229         fDigits[fCount] = 0;    // NULL terminate
 230
 231         // This conversion is bad on 64-bit platforms when we want to
 232         // be able to return a 64-bit number [grhoten]
 233         *fDecimalDigits = fIsPositive ? '+' : '-';
 234         value = (int32_t)atol(fDecimalDigits);
 235         return value;
 236     }
 237     else {
 238         // This is 100% accurate in c++ because if we are representing
 239         // an integral value, we suffer nothing in the conversion to
 240         // double.  If we are to support 64-bit longs later, getLong()
 241         // must be rewritten. [LIU]
 242         return (int32_t)getDouble();
 243     }
 244 }
 245
 246
 247 /**
 248  * Make sure that fitsIntoInt64() is called before calling this function.
 249  */
 250 int64_t DigitList::getInt64() /*const*/
 251 {
 252     if (fCount == fDecimalAt) {
 253         uint64_t value;
 254
 255         fDigits[fCount] = 0;    // NULL terminate
 256
 257         // This conversion is bad on 64-bit platforms when we want to
 258         // be able to return a 64-bit number [grhoten]
 259         *fDecimalDigits = fIsPositive ? '+' : '-';
 260
 261         if (fCount < LONG_MIN_REP_LENGTH) {
 262             return (int64_t)atol(fDecimalDigits);
 263         }
 264
 265         // too big for atol, hand-roll atoi64
 266         value = 0;
 267         for (int i = 0; i < fCount; ++i) {
 268             int v = fDigits[i] - kZero;
 269             value = value * (uint64_t)10 + (uint64_t)v;
 270         }
 271         if (!fIsPositive) {
 272             value = ~value;
 273             value += 1;
 274         }
 275         int64_t svalue = (int64_t)value;
 276         return svalue;
 277     }
 278     else {
 279         // todo: figure out best approach
 280
 281         // This is 100% accurate in c++ because if we are representing
 282         // an integral value, we suffer nothing in the conversion to
 283         // double.  If we are to support 64-bit longs later, getLong()
 284         // must be rewritten. [LIU]
 285         return (int64_t)getDouble();
 286     }
 287 }
 288
 289 /**
 290  * Return true if the number represented by this object can fit into
 291  * a long.
 292  */
 293 UBool
 294 DigitList::fitsIntoLong(UBool ignoreNegativeZero) /*const*/
 295 {
 296     // Figure out if the result will fit in a long.  We have to
 297     // first look for nonzero digits after the decimal point;
 298     // then check the size.
 299
 300     // Trim trailing zeros after the decimal point. This does not change
 301     // the represented value.
 302     while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
 303         --fCount;
 304
 305     if (fCount == 0) {
 306         // Positive zero fits into a long, but negative zero can only
 307         // be represented as a double. - bug 4162852
 308         return fIsPositive || ignoreNegativeZero;
 309     }
 310
 311     // If the digit list represents a double or this number is too
 312     // big for a long.
 313     if (fDecimalAt < fCount || fDecimalAt > LONG_MIN_REP_LENGTH)
 314         return FALSE;
 315
 316     // If number is small enough to fit in a long
 317     if (fDecimalAt < LONG_MIN_REP_LENGTH)
 318         return TRUE;
 319
 320     // At this point we have fDecimalAt == fCount, and fCount == LONG_MIN_REP_LENGTH.
 321     // The number will overflow if it is larger than LONG_MAX
 322     // or smaller than LONG_MIN.
 323     for (int32_t i=0; i<fCount; ++i)
 324     {
 325         char dig = fDigits[i],
 326              max = LONG_MIN_REP[i];
 327         if (dig > max)
 328             return FALSE;
 329         if (dig < max)
 330             return TRUE;
 331     }
 332
 333     // At this point the first count digits match.  If fDecimalAt is less
 334     // than count, then the remaining digits are zero, and we return true.
 335     if (fCount < fDecimalAt)
 336         return TRUE;
 337
 338     // Now we have a representation of Long.MIN_VALUE, without the leading
 339     // negative sign.  If this represents a positive value, then it does
 340     // not fit; otherwise it fits.
 341     return !fIsPositive;
 342 }
 343
 344 /**
 345  * Return true if the number represented by this object can fit into
 346  * a long.
 347  */
 348 UBool
 349 DigitList::fitsIntoInt64(UBool ignoreNegativeZero) /*const*/
 350 {
 351     // Figure out if the result will fit in a long.  We have to
 352     // first look for nonzero digits after the decimal point;
 353     // then check the size.
 354
 355     // Trim trailing zeros after the decimal point. This does not change
 356     // the represented value.
 357     while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
 358         --fCount;
 359
 360     if (fCount == 0) {
 361         // Positive zero fits into a long, but negative zero can only
 362         // be represented as a double. - bug 4162852
 363         return fIsPositive || ignoreNegativeZero;
 364     }
 365
 366     // If the digit list represents a double or this number is too
 367     // big for a long.
 368     if (fDecimalAt < fCount || fDecimalAt > I64_MIN_REP_LENGTH)
 369         return FALSE;
 370
 371     // If number is small enough to fit in an int64
 372     if (fDecimalAt < I64_MIN_REP_LENGTH)
 373         return TRUE;
 374
 375     // At this point we have fDecimalAt == fCount, and fCount == INT64_MIN_REP_LENGTH.
 376     // The number will overflow if it is larger than U_INT64_MAX
 377     // or smaller than U_INT64_MIN.
 378     for (int32_t i=0; i<fCount; ++i)
 379     {
 380         char dig = fDigits[i],
 381              max = I64_MIN_REP[i];
 382         if (dig > max)
 383             return FALSE;
 384         if (dig < max)
 385             return TRUE;
 386     }
 387
 388     // At this point the first count digits match.  If fDecimalAt is less
 389     // than count, then the remaining digits are zero, and we return true.
 390     if (fCount < fDecimalAt)
 391         return TRUE;
 392
 393     // Now we have a representation of INT64_MIN_VALUE, without the leading
 394     // negative sign.  If this represents a positive value, then it does
 395     // not fit; otherwise it fits.
 396     return !fIsPositive;
 397 }
 398
 399
 400 // -------------------------------------
 401
 402 void
 403 DigitList::set(int32_t source, int32_t maximumDigits)
 404 {
 405     set((int64_t)source, maximumDigits);
 406 }
 407
 408 // -------------------------------------
 409 /**
 410  * @param maximumDigits The maximum digits to be generated.  If zero,
 411  * there is no maximum -- generate all digits.
 412  */
 413 void
 414 DigitList::set(int64_t source, int32_t maximumDigits)
 415 {
 416     fCount = fDecimalAt = formatBase10(source, fDecimalDigits, MAX_DIGITS);
 417
 418     fIsPositive = (*fDecimalDigits == '+');
 419
 420     // Don't copy trailing zeros
 421     while (fCount > 1 && fDigits[fCount - 1] == kZero)
 422         --fCount;
 423
 424     if(maximumDigits > 0)
 425         round(maximumDigits);
 426 }
 427
 428 /**
 429  * Set the digit list to a representation of the given double value.
 430  * This method supports both fixed-point and exponential notation.
 431  * @param source Value to be converted; must not be Inf, -Inf, Nan,
 432  * or a value <= 0.
 433  * @param maximumDigits The most fractional or total digits which should
 434  * be converted.  If total digits, and the value is zero, then
 435  * there is no maximum -- generate all digits.
 436  * @param fixedPoint If true, then maximumDigits is the maximum
 437  * fractional digits to be converted.  If false, total digits.
 438  */
 439 void
 440 DigitList::set(double source, int32_t maximumDigits, UBool fixedPoint)
 441 {
 442     // for now, simple implementation; later, do proper IEEE stuff
 443     char rep[MAX_DIGITS + 8]; // Extra space for '+', '.', e+NNN, and '\0' (actually +8 is enough)
 444     char *digitPtr      = fDigits;
 445     char *repPtr        = rep + 2;  // +2 to skip the sign and decimal
 446     int32_t exponent    = 0;
 447
 448     fIsPositive = !uprv_isNegative(source);    // Allow +0 and -0
 449
 450     // Generate a representation of the form /[+-][0-9]+e[+-][0-9]+/
 451     sprintf(rep, "%+1.*e", MAX_DBL_DIGITS - 1, source);
 452     fDecimalAt  = 0;
 453     rep[2]      = rep[1];    // remove decimal
 454
 455     while (*repPtr == kZero) {
 456         repPtr++;
 457         fDecimalAt--;   // account for leading zeros
 458     }
 459
 460     while (*repPtr != 'e') {
 461         *(digitPtr++) = *(repPtr++);
 462     }
 463     fCount = MAX_DBL_DIGITS + fDecimalAt;
 464
 465     // Parse an exponent of the form /[eE][+-][0-9]+/
 466     UBool negExp = (*(++repPtr) == '-');
 467     while (*(++repPtr) != 0) {
 468         exponent = 10*exponent + *repPtr - kZero;
 469     }
 470     if (negExp) {
 471         exponent = -exponent;
 472     }
 473     fDecimalAt += exponent + 1; // +1 for decimal removal
 474
 475     // The negative of the exponent represents the number of leading
 476     // zeros between the decimal and the first non-zero digit, for
 477     // a value < 0.1 (e.g., for 0.00123, -decimalAt == 2).  If this
 478     // is more than the maximum fraction digits, then we have an underflow
 479     // for the printed representation.
 480     if (fixedPoint && -fDecimalAt >= maximumDigits)
 481     {
 482         // If we round 0.0009 to 3 fractional digits, then we have to
 483         // create a new one digit in the least significant location.
 484         if (-fDecimalAt == maximumDigits && shouldRoundUp(0)) {
 485             fCount = 1;
 486             ++fDecimalAt;
 487             fDigits[0] = (char)'1';
 488         } else {
 489             // Handle an underflow to zero when we round something like
 490             // 0.0009 to 2 fractional digits.
 491             fCount = 0;
 492         }
 493         return;
 494     }
 495
 496
 497     // Eliminate digits beyond maximum digits to be displayed.
 498     // Round up if appropriate.  Do NOT round in the special
 499     // case where maximumDigits == 0 and fixedPoint is FALSE.
 500     if (fixedPoint || (0 < maximumDigits && maximumDigits < fCount)) {
 501         round(fixedPoint ? (maximumDigits + fDecimalAt) : maximumDigits);
 502     }
 503     else {
 504         // Eliminate trailing zeros.
 505         while (fCount > 1 && fDigits[fCount - 1] == kZero)
 506             --fCount;
 507     }
 508 }
 509
 510 // -------------------------------------
 511
 512 /**
 513  * Round the representation to the given number of digits.
 514  * @param maximumDigits The maximum number of digits to be shown.
 515  * Upon return, count will be less than or equal to maximumDigits.
 516  */
 517 void
 518 DigitList::round(int32_t maximumDigits)
 519 {
 520     // Eliminate digits beyond maximum digits to be displayed.
 521     // Round up if appropriate.
 522     if (maximumDigits >= 0 && maximumDigits < fCount)
 523     {
 524         if (shouldRoundUp(maximumDigits)) {
 525             // Rounding up involved incrementing digits from LSD to MSD.
 526             // In most cases this is simple, but in a worst case situation
 527             // (9999..99) we have to adjust the decimalAt value.
 528             while (--maximumDigits >= 0 && ++fDigits[maximumDigits] > '9')
 529                 ;
 530
 531             if (maximumDigits < 0)
 532             {
 533                 // We have all 9's, so we increment to a single digit
 534                 // of one and adjust the exponent.
 535                 fDigits[0] = (char) '1';
 536                 ++fDecimalAt;
 537                 maximumDigits = 1; // Adjust the count
 538             }
 539             else
 540             {
 541                 ++maximumDigits; // Increment for use as count
 542             }
 543         }
 544         fCount = maximumDigits;
 545     }
 546
 547     // Eliminate trailing zeros.
 548     while (fCount > 1 && fDigits[fCount-1] == kZero) {
 549         --fCount;
 550     }
 551 }
 552
 553 /**
 554  * Return true if truncating the representation to the given number
 555  * of digits will result in an increment to the last digit.  This
 556  * method implements the requested rounding mode.
 557  * [bnf]
 558  * @param maximumDigits the number of digits to keep, from 0 to
 559  * <code>count-1</code>.  If 0, then all digits are rounded away, and
 560  * this method returns true if a one should be generated (e.g., formatting
 561  * 0.09 with "#.#").
 562  * @return true if digit <code>maximumDigits-1</code> should be
 563  * incremented
 564  */
 565 UBool DigitList::shouldRoundUp(int32_t maximumDigits) const {
 566     switch (fRoundingMode) {
 567     case DecimalFormat::kRoundCeiling:
 568         return fIsPositive;
 569     case DecimalFormat::kRoundFloor:
 570         return !fIsPositive;
 571     case DecimalFormat::kRoundDown:
 572         return FALSE;
 573     case DecimalFormat::kRoundUp:
 574         return TRUE;
 575     case DecimalFormat::kRoundHalfEven:
 576     case DecimalFormat::kRoundHalfDown:
 577     case DecimalFormat::kRoundHalfUp:
 578     default:
 579         if (fDigits[maximumDigits] == '5' ) {
 580             for (int i=maximumDigits+1; i<fCount; ++i) {
 581                 if (fDigits[i] != kZero) {
 582                     return TRUE;
 583                 }
 584             }
 585             switch (fRoundingMode) {
 586             case DecimalFormat::kRoundHalfEven:
 587             default:
 588                 // Implement IEEE half-even rounding
 589                 return maximumDigits > 0 && (fDigits[maximumDigits-1] % 2 != 0);
 590             case DecimalFormat::kRoundHalfDown:
 591                 return FALSE;
 592             case DecimalFormat::kRoundHalfUp:
 593                 return TRUE;
 594             }
 595         }
 596         return (fDigits[maximumDigits] > '5');
 597     }
 598 }
 599
 600 // -------------------------------------
 601
 602 // In the Java implementation, we need a separate set(long) because 64-bit longs
 603 // have too much precision to fit into a 64-bit double.  In C++, longs can just
 604 // be passed to set(double) as long as they are 32 bits in size.  We currently
 605 // don't implement 64-bit longs in C++, although the code below would work for
 606 // that with slight modifications. [LIU]
 607 /*
 608 void
 609 DigitList::set(long source)
 610 {
 611     // handle the special case of zero using a standard exponent of 0.
 612     // mathematically, the exponent can be any value.
 613     if (source == 0)
 614     {
 615         fcount = 0;
 616         fDecimalAt = 0;
 617         return;
 618     }
 619
 620     // we don't accept negative numbers, with the exception of long_min.
 621     // long_min is treated specially by being represented as long_max+1,
 622     // which is actually an impossible signed long value, so there is no
 623     // ambiguity.  we do this for convenience, so digitlist can easily
 624     // represent the digits of a long.
 625     bool islongmin = (source == long_min);
 626     if (islongmin)
 627     {
 628         source = -(source + 1); // that is, long_max
 629         islongmin = true;
 630     }
 631     sprintf(fdigits, "%d", source);
 632
 633     // now we need to compute the exponent.  it's easy in this case; it's
 634     // just the same as the count.  e.g., 0.123 * 10^3 = 123.
 635     fcount = strlen(fdigits);
 636     fDecimalAt = fcount;
 637
 638     // here's how we represent long_max + 1.  note that we always know
 639     // that the last digit of long_max will not be 9, because long_max
 640     // is of the form (2^n)-1.
 641     if (islongmin)
 642         ++fdigits[fcount-1];
 643
 644     // finally, we trim off trailing zeros.  we don't alter fDecimalAt,
 645     // so this has no effect on the represented value.  we know the first
 646     // digit is non-zero (see code above), so we only have to check down
 647     // to fdigits[1].
 648     while (fcount > 1 && fdigits[fcount-1] == kzero)
 649         --fcount;
 650 }
 651 */
 652
 653 /**
 654  * Return true if this object represents the value zero.  Anything with
 655  * no digits, or all zero digits, is zero, regardless of fDecimalAt.
 656  */
 657 UBool
 658 DigitList::isZero() const
 659 {
 660     for (int32_t i=0; i<fCount; ++i)
 661         if (fDigits[i] != kZero)
 662             return FALSE;
 663     return TRUE;
 664 }
 665
 666 U_NAMESPACE_END
 667
 668 //eof