ICU-6.2.4.tar.gz

[apple/icu.git] / icuSources / i18n / digitlst.cpp
diff --git a/icuSources/i18n/digitlst.cpp b/icuSources/i18n/digitlst.cpp

new file mode 100644 (file)

index 0000000..2ec5a14
--- /dev/null
+++ b/icuSources/i18n/digitlst.cpp
@@ -0,0 +1,643 @@
+/*
+**********************************************************************
+*   Copyright (C) 1997-2004, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*
+* File DIGITLST.CPP
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   03/21/97    clhuang     Converted from java.
+*   03/21/97    clhuang     Implemented with new APIs.
+*   03/27/97    helena      Updated to pass the simple test after code review.
+*   03/31/97    aliu        Moved isLONG_MIN to here, and fixed it.
+*   04/15/97    aliu        Changed MAX_COUNT to DBL_DIG.  Changed Digit to char.
+*                           Reworked representation by replacing fDecimalAt
+*                           with fExponent.
+*   04/16/97    aliu        Rewrote set() and getDouble() to use sprintf/atof
+*                           to do digit conversion.
+*   09/09/97    aliu        Modified for exponential notation support.
+*   08/02/98    stephen     Added nearest/even rounding
+*                            Fixed bug in fitsIntoLong
+******************************************************************************
+*/
+
+#include "unicode/putil.h"
+#include "digitlst.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+
+// ***************************************************************************
+// class DigitList
+// This class handles the transcoding between numeric values and strings of
+//  characters.  Only handles as non-negative numbers.  
+// ***************************************************************************
+
+/**
+ * This is the zero digit.  Array elements fDigits[i] have values from
+ * kZero to kZero + 9.  Typically, this is '0'.
+ */
+#define kZero '0'
+
+static char gDecimal = 0;
+
+/* Only for 32 bit numbers. Ignore the negative sign. */
+static const char LONG_MIN_REP[] = "2147483648";
+static const char I64_MIN_REP[] = "9223372036854775808";
+
+static const int64_t I64_MIN_VALUE = U_INT64_MIN;
+
+enum {
+    LONG_MIN_REP_LENGTH = sizeof(LONG_MIN_REP) - 1, //Ignore the NULL at the end
+    I64_MIN_REP_LENGTH = sizeof(I64_MIN_REP) - 1 //Ignore the NULL at the end
+};
+
+U_NAMESPACE_BEGIN
+
+
+// -------------------------------------
+// default constructor
+
+DigitList::DigitList()
+{
+    fDigits = fDecimalDigits + 1;   // skip the decimal
+    clear();
+}
+
+// -------------------------------------
+
+DigitList::~DigitList()
+{
+}
+
+// -------------------------------------
+// copy constructor
+
+DigitList::DigitList(const DigitList &other)
+{
+    fDigits = fDecimalDigits + 1;   // skip the decimal
+    *this = other;
+}
+
+// -------------------------------------
+// assignment operator
+
+DigitList&
+DigitList::operator=(const DigitList& other)
+{
+    if (this != &other)
+    {
+        fDecimalAt = other.fDecimalAt;
+        fCount = other.fCount;
+        fIsPositive = other.fIsPositive;
+        uprv_strncpy(fDigits, other.fDigits, fCount);
+    }
+    return *this;
+}
+
+// -------------------------------------
+
+UBool
+DigitList::operator==(const DigitList& that) const
+{
+    return ((this == &that) ||
+            (fDecimalAt == that.fDecimalAt &&
+             fCount == that.fCount &&
+             fIsPositive == that.fIsPositive &&
+             uprv_strncmp(fDigits, that.fDigits, fCount) == 0));
+}
+
+// -------------------------------------
+// Resets the digit list; sets all the digits to zero.
+
+void
+DigitList::clear()
+{
+    fDecimalAt = 0;
+    fCount = 0;
+    fIsPositive = TRUE;
+
+    // Don't bother initializing fDigits because fCount is 0.
+}
+
+
+
+// -------------------------------------
+
+/**
+ * Formats a number into a base 10 string representation, and NULL terminates it.
+ * @param number The number to format
+ * @param outputStr The string to output to
+ * @param outputLen The maximum number of characters to put into outputStr
+ *                  (including NULL).
+ * @return the number of digits written, not including the sign.
+ */
+static int32_t
+formatBase10(int64_t number, char *outputStr, int32_t outputLen) 
+{
+    char buffer[MAX_DIGITS + 1];
+    int32_t bufferLen;
+    int32_t result;
+
+    if (outputLen > MAX_DIGITS) {
+        outputLen = MAX_DIGITS;     // Ignore NULL
+    }
+    else if (outputLen < 3) {
+        return 0;                   // Not enough room
+    }
+
+    bufferLen = outputLen;
+
+    if (number < 0) {   // Negative numbers are slightly larger than a postive
+        buffer[bufferLen--] = (char)(-(number % 10) + kZero);
+        number /= -10;
+        *(outputStr++) = '-';
+    }
+    else {
+        *(outputStr++) = '+';    // allow +0
+    }
+    while (bufferLen >= 0 && number) {      // Output the number
+        buffer[bufferLen--] = (char)(number % 10 + kZero);
+        number /= 10;
+    }
+
+    result = outputLen - bufferLen++;
+
+    while (bufferLen <= outputLen) {     // Copy the number to output
+        *(outputStr++) = buffer[bufferLen++];
+    }
+    *outputStr = 0;   // NULL terminate.
+    return result;
+}
+
+/**
+ * Currently, getDouble() depends on atof() to do its conversion.
+ *
+ * WARNING!!
+ * This is an extremely costly function. ~1/2 of the conversion time
+ * can be linked to this function.
+ */
+double
+DigitList::getDouble() /*const*/
+{
+    double value;
+
+    if (fCount == 0) {
+        value = 0.0;
+    }
+    else {
+        char* end = NULL;
+        if (!gDecimal) {
+            char rep[MAX_DIGITS];
+            // For machines that decide to change the decimal on you,
+            // and try to be too smart with localization.
+            // This normally should be just a '.'.
+            sprintf(rep, "%+1.1f", 1.0);
+            gDecimal = rep[2];
+        }
+
+        *fDecimalDigits = gDecimal;
+        *(fDigits+fCount) = 'e';    // add an e after the digits.
+        formatBase10(fDecimalAt,
+                     fDigits + fCount + 1,  // skip the 'e'
+                     MAX_DEC_DIGITS - fCount - 3);  // skip the 'e' and '.'
+        value = uprv_strtod(fDecimalDigits, &end);
+    }
+
+    return fIsPositive ? value : -value;
+}
+
+// -------------------------------------
+
+/**
+ * Make sure that fitsIntoLong() is called before calling this function.
+ */
+int32_t DigitList::getLong() /*const*/
+{
+    if (fCount == fDecimalAt) {
+        int32_t value;
+
+        fDigits[fCount] = 0;    // NULL terminate
+
+        // This conversion is bad on 64-bit platforms when we want to
+        // be able to return a 64-bit number [grhoten]
+        *fDecimalDigits = fIsPositive ? '+' : '-';
+        value = (int32_t)atol(fDecimalDigits);
+        return value;
+    }
+    else {
+        // This is 100% accurate in c++ because if we are representing
+        // an integral value, we suffer nothing in the conversion to
+        // double.  If we are to support 64-bit longs later, getLong()
+        // must be rewritten. [LIU]
+        return (int32_t)getDouble();
+    }
+}
+
+
+/**
+ * Make sure that fitsIntoInt64() is called before calling this function.
+ */
+int64_t DigitList::getInt64() /*const*/
+{
+    if (fCount == fDecimalAt) {
+        uint64_t value;
+
+        fDigits[fCount] = 0;    // NULL terminate
+
+        // This conversion is bad on 64-bit platforms when we want to
+        // be able to return a 64-bit number [grhoten]
+        *fDecimalDigits = fIsPositive ? '+' : '-';
+
+        if (fCount < LONG_MIN_REP_LENGTH) {
+            return (int64_t)atol(fDecimalDigits);
+        }
+
+        // too big for atol, hand-roll atoi64
+        value = 0;
+        for (int i = 0; i < fCount; ++i) {
+            int v = fDigits[i] - kZero;
+            value = value * (uint64_t)10 + (uint64_t)v;
+        }
+        if (!fIsPositive) {
+            value = ~value;
+            value += 1;
+        }
+        int64_t svalue = (int64_t)value;
+        return svalue;
+    }
+    else {
+        // todo: figure out best approach
+
+        // This is 100% accurate in c++ because if we are representing
+        // an integral value, we suffer nothing in the conversion to
+        // double.  If we are to support 64-bit longs later, getLong()
+        // must be rewritten. [LIU]
+        return (int64_t)getDouble();
+    }
+}
+
+/**
+ * Return true if the number represented by this object can fit into
+ * a long.
+ */
+UBool
+DigitList::fitsIntoLong(UBool ignoreNegativeZero) /*const*/
+{
+    // Figure out if the result will fit in a long.  We have to
+    // first look for nonzero digits after the decimal point;
+    // then check the size.
+
+    // Trim trailing zeros after the decimal point. This does not change
+    // the represented value.
+    while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
+        --fCount;
+
+    if (fCount == 0) {
+        // Positive zero fits into a long, but negative zero can only
+        // be represented as a double. - bug 4162852
+        return fIsPositive || ignoreNegativeZero;
+    }
+
+    // If the digit list represents a double or this number is too
+    // big for a long.
+    if (fDecimalAt < fCount || fDecimalAt > LONG_MIN_REP_LENGTH)
+        return FALSE;
+
+    // If number is small enough to fit in a long
+    if (fDecimalAt < LONG_MIN_REP_LENGTH)
+        return TRUE;
+
+    // At this point we have fDecimalAt == fCount, and fCount == LONG_MIN_REP_LENGTH.
+    // The number will overflow if it is larger than LONG_MAX
+    // or smaller than LONG_MIN.
+    for (int32_t i=0; i<fCount; ++i)
+    {
+        char dig = fDigits[i],
+             max = LONG_MIN_REP[i];
+        if (dig > max)
+            return FALSE;
+        if (dig < max)
+            return TRUE;
+    }
+
+    // At this point the first count digits match.  If fDecimalAt is less
+    // than count, then the remaining digits are zero, and we return true.
+    if (fCount < fDecimalAt)
+        return TRUE;
+
+    // Now we have a representation of Long.MIN_VALUE, without the leading
+    // negative sign.  If this represents a positive value, then it does
+    // not fit; otherwise it fits.
+    return !fIsPositive;
+}
+
+/**
+ * Return true if the number represented by this object can fit into
+ * a long.
+ */
+UBool
+DigitList::fitsIntoInt64(UBool ignoreNegativeZero) /*const*/
+{
+    // Figure out if the result will fit in a long.  We have to
+    // first look for nonzero digits after the decimal point;
+    // then check the size.
+
+    // Trim trailing zeros after the decimal point. This does not change
+    // the represented value.
+    while (fCount > fDecimalAt && fCount > 0 && fDigits[fCount - 1] == kZero)
+        --fCount;
+
+    if (fCount == 0) {
+        // Positive zero fits into a long, but negative zero can only
+        // be represented as a double. - bug 4162852
+        return fIsPositive || ignoreNegativeZero;
+    }
+
+    // If the digit list represents a double or this number is too
+    // big for a long.
+    if (fDecimalAt < fCount || fDecimalAt > I64_MIN_REP_LENGTH)
+        return FALSE;
+
+    // If number is small enough to fit in an int64
+    if (fDecimalAt < I64_MIN_REP_LENGTH)
+        return TRUE;
+
+    // At this point we have fDecimalAt == fCount, and fCount == INT64_MIN_REP_LENGTH.
+    // The number will overflow if it is larger than U_INT64_MAX
+    // or smaller than U_INT64_MIN.
+    for (int32_t i=0; i<fCount; ++i)
+    {
+        char dig = fDigits[i],
+             max = I64_MIN_REP[i];
+        if (dig > max)
+            return FALSE;
+        if (dig < max)
+            return TRUE;
+    }
+
+    // At this point the first count digits match.  If fDecimalAt is less
+    // than count, then the remaining digits are zero, and we return true.
+    if (fCount < fDecimalAt)
+        return TRUE;
+
+    // Now we have a representation of INT64_MIN_VALUE, without the leading
+    // negative sign.  If this represents a positive value, then it does
+    // not fit; otherwise it fits.
+    return !fIsPositive;
+}
+
+
+// -------------------------------------
+
+void
+DigitList::set(int32_t source, int32_t maximumDigits)
+{
+    set((int64_t)source, maximumDigits);
+}
+
+// -------------------------------------
+/**
+ * @param maximumDigits The maximum digits to be generated.  If zero,
+ * there is no maximum -- generate all digits.
+ */
+void
+DigitList::set(int64_t source, int32_t maximumDigits)
+{
+    fCount = fDecimalAt = formatBase10(source, fDecimalDigits, MAX_DIGITS);
+
+    fIsPositive = (*fDecimalDigits == '+');
+    
+    // Don't copy trailing zeros
+    while (fCount > 1 && fDigits[fCount - 1] == kZero) 
+        --fCount;
+    
+    if(maximumDigits > 0) 
+        round(maximumDigits);
+}
+
+/**
+ * Set the digit list to a representation of the given double value.
+ * This method supports both fixed-point and exponential notation.
+ * @param source Value to be converted; must not be Inf, -Inf, Nan,
+ * or a value <= 0.
+ * @param maximumDigits The most fractional or total digits which should
+ * be converted.  If total digits, and the value is zero, then
+ * there is no maximum -- generate all digits.
+ * @param fixedPoint If true, then maximumDigits is the maximum
+ * fractional digits to be converted.  If false, total digits.
+ */
+void
+DigitList::set(double source, int32_t maximumDigits, UBool fixedPoint)
+{
+    // for now, simple implementation; later, do proper IEEE stuff
+    char rep[MAX_DIGITS + 8]; // Extra space for '+', '.', e+NNN, and '\0' (actually +8 is enough)
+    char *digitPtr      = fDigits;
+    char *repPtr        = rep + 2;  // +2 to skip the sign and decimal
+    int32_t exponent    = 0;
+
+    fIsPositive = !uprv_isNegative(source);    // Allow +0 and -0
+
+    // Generate a representation of the form /[+-][0-9]+e[+-][0-9]+/
+    sprintf(rep, "%+1.*e", MAX_DBL_DIGITS - 1, source);
+    fDecimalAt  = 0;
+    rep[2]      = rep[1];    // remove decimal
+
+    while (*repPtr == kZero) {
+        repPtr++;
+        fDecimalAt--;   // account for leading zeros
+    }
+
+    while (*repPtr != 'e') {
+        *(digitPtr++) = *(repPtr++);
+    }
+    fCount = MAX_DBL_DIGITS + fDecimalAt;
+
+    // Parse an exponent of the form /[eE][+-][0-9]+/
+    UBool negExp = (*(++repPtr) == '-');
+    while (*(++repPtr) != 0) {
+        exponent = 10*exponent + *repPtr - kZero;
+    }
+    if (negExp) {
+        exponent = -exponent;
+    }
+    fDecimalAt += exponent + 1; // +1 for decimal removal
+
+    // The negative of the exponent represents the number of leading
+    // zeros between the decimal and the first non-zero digit, for
+    // a value < 0.1 (e.g., for 0.00123, -decimalAt == 2).  If this
+    // is more than the maximum fraction digits, then we have an underflow
+    // for the printed representation.
+    if (fixedPoint && -fDecimalAt >= maximumDigits)
+    {
+        // If we round 0.0009 to 3 fractional digits, then we have to
+        // create a new one digit in the least significant location.
+        if (-fDecimalAt == maximumDigits && shouldRoundUp(0)) {
+            fCount = 1;
+            ++fDecimalAt;
+            fDigits[0] = (char)'1';
+        } else {
+            // Handle an underflow to zero when we round something like
+            // 0.0009 to 2 fractional digits.
+            fCount = 0;
+        }
+        return;
+    }
+
+
+    // Eliminate digits beyond maximum digits to be displayed.
+    // Round up if appropriate.  Do NOT round in the special
+    // case where maximumDigits == 0 and fixedPoint is FALSE.
+    if (fixedPoint || (0 < maximumDigits && maximumDigits < fCount)) {
+        round(fixedPoint ? (maximumDigits + fDecimalAt) : maximumDigits);
+    }
+    else {
+        // Eliminate trailing zeros.
+        while (fCount > 1 && fDigits[fCount - 1] == kZero)
+            --fCount;
+    }
+}
+
+// -------------------------------------
+
+/**
+ * Round the representation to the given number of digits.
+ * @param maximumDigits The maximum number of digits to be shown.
+ * Upon return, count will be less than or equal to maximumDigits.
+ */
+void 
+DigitList::round(int32_t maximumDigits)
+{
+    // Eliminate digits beyond maximum digits to be displayed.
+    // Round up if appropriate.
+    if (maximumDigits >= 0 && maximumDigits < fCount)
+    {
+        if (shouldRoundUp(maximumDigits)) {
+            // Rounding up involved incrementing digits from LSD to MSD.
+            // In most cases this is simple, but in a worst case situation
+            // (9999..99) we have to adjust the decimalAt value.
+            while (--maximumDigits >= 0 && ++fDigits[maximumDigits] > '9')
+                ;
+
+            if (maximumDigits < 0)
+            {
+                // We have all 9's, so we increment to a single digit
+                // of one and adjust the exponent.
+                fDigits[0] = (char) '1';
+                ++fDecimalAt;
+                maximumDigits = 1; // Adjust the count
+            }
+            else
+            {
+                ++maximumDigits; // Increment for use as count
+            }
+        }
+        fCount = maximumDigits;
+    }
+
+    // Eliminate trailing zeros.
+    while (fCount > 1 && fDigits[fCount-1] == kZero) {
+        --fCount;
+    }
+}
+
+/**
+ * Return true if truncating the representation to the given number
+ * of digits will result in an increment to the last digit.  This
+ * method implements half-even rounding, the default rounding mode.
+ * [bnf]
+ * @param maximumDigits the number of digits to keep, from 0 to
+ * <code>count-1</code>.  If 0, then all digits are rounded away, and
+ * this method returns true if a one should be generated (e.g., formatting
+ * 0.09 with "#.#").
+ * @return true if digit <code>maximumDigits-1</code> should be
+ * incremented
+ */
+UBool DigitList::shouldRoundUp(int32_t maximumDigits) const {
+    // Implement IEEE half-even rounding
+    if (fDigits[maximumDigits] == '5' ) {
+        for (int i=maximumDigits+1; i<fCount; ++i) {
+            if (fDigits[i] != kZero) {
+                return TRUE;
+            }
+        }
+        return maximumDigits > 0 && (fDigits[maximumDigits-1] % 2 != 0);
+    }
+    return (fDigits[maximumDigits] > '5');
+}
+
+// -------------------------------------
+
+// In the Java implementation, we need a separate set(long) because 64-bit longs
+// have too much precision to fit into a 64-bit double.  In C++, longs can just
+// be passed to set(double) as long as they are 32 bits in size.  We currently
+// don't implement 64-bit longs in C++, although the code below would work for
+// that with slight modifications. [LIU]
+/*
+void
+DigitList::set(long source)
+{
+    // handle the special case of zero using a standard exponent of 0.
+    // mathematically, the exponent can be any value.
+    if (source == 0)
+    {
+        fcount = 0;
+        fDecimalAt = 0;
+        return;
+    }
+
+    // we don't accept negative numbers, with the exception of long_min.
+    // long_min is treated specially by being represented as long_max+1,
+    // which is actually an impossible signed long value, so there is no
+    // ambiguity.  we do this for convenience, so digitlist can easily
+    // represent the digits of a long.
+    bool islongmin = (source == long_min);
+    if (islongmin)
+    {
+        source = -(source + 1); // that is, long_max
+        islongmin = true;
+    }
+    sprintf(fdigits, "%d", source);
+
+    // now we need to compute the exponent.  it's easy in this case; it's
+    // just the same as the count.  e.g., 0.123 * 10^3 = 123.
+    fcount = strlen(fdigits);
+    fDecimalAt = fcount;
+
+    // here's how we represent long_max + 1.  note that we always know
+    // that the last digit of long_max will not be 9, because long_max
+    // is of the form (2^n)-1.
+    if (islongmin)
+        ++fdigits[fcount-1];
+
+    // finally, we trim off trailing zeros.  we don't alter fDecimalAt,
+    // so this has no effect on the represented value.  we know the first
+    // digit is non-zero (see code above), so we only have to check down
+    // to fdigits[1].
+    while (fcount > 1 && fdigits[fcount-1] == kzero)
+        --fcount;
+}
+*/
+
+/**
+ * Return true if this object represents the value zero.  Anything with
+ * no digits, or all zero digits, is zero, regardless of fDecimalAt.
+ */
+UBool
+DigitList::isZero() const
+{
+    for (int32_t i=0; i<fCount; ++i)
+        if (fDigits[i] != kZero)
+            return FALSE;
+    return TRUE;
+}
+
+U_NAMESPACE_END
+
+//eof