[apple/icu.git] / icuSources / i18n / affixpatternparser.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 * Copyright (C) 2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *
 * file name: affixpatternparser.cpp
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING

#include "unicode/dcfmtsym.h"
#include "unicode/plurrule.h"
#include "unicode/strenum.h"
#include "unicode/ucurr.h"
#include "unicode/ustring.h"
#include "affixpatternparser.h"
#include "charstr.h"
#include "precision.h"
#include "uassert.h"
#include "unistrappender.h"

        static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4};

static UChar gPercent = 0x25;
static UChar gPerMill = 0x2030;
static UChar gNegative = 0x2D;
static UChar gPositive = 0x2B;

#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))

#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))

#define UNPACK_LONG(c) (((c) >> 8) & 0x80)

#define UNPACK_LENGTH(c) ((c) & 0xFF)

U_NAMESPACE_BEGIN

static int32_t
nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
    if (buffer[idx] != 0x27 || idx + 1 == len) {
        *token = buffer[idx];
        return 1;
    }
    *token = buffer[idx + 1];
    if (buffer[idx + 1] == 0xA4) {
        int32_t i = 2;
        for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i)
          ;
        return i;
    }
    return 2;
}

static int32_t
nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {
    *token = buffer[idx];
    int32_t max;
    switch (buffer[idx]) {
    case 0x27:
        max = 2;
        break;
    case 0xA4:
        max = 3;
        break;
    default:
        max = 1;
        break;
    }
    int32_t i = 1;
    for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i)
      ;
    return i;
}

CurrencyAffixInfo::CurrencyAffixInfo()
        : fSymbol(gDefaultSymbols, 1),
          fISO(gDefaultSymbols, 2),
          fLong(DigitAffix(gDefaultSymbols, 3)),
          fIsDefault(TRUE) {
}

void
CurrencyAffixInfo::set(
        const char *locale,
        const PluralRules *rules,
        const UChar *currency,
        UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }
    fIsDefault = FALSE;
    if (currency == NULL) {
        fSymbol.setTo(gDefaultSymbols, 1);
        fISO.setTo(gDefaultSymbols, 2);
        fLong.remove();
        fLong.append(gDefaultSymbols, 3);
        fIsDefault = TRUE;
        return;
    }
    int32_t len;
    UBool unusedIsChoice;
    const UChar *symbol = ucurr_getName(
            currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice,
            &len, &status);
    if (U_FAILURE(status)) {
        return;
    }
    fSymbol.setTo(symbol, len);
    fISO.setTo(currency, u_strlen(currency));
    fLong.remove();
    StringEnumeration* keywords = rules->getKeywords(status);
    if (U_FAILURE(status)) {
        return;
    }
    const UnicodeString* pluralCount;
    while ((pluralCount = keywords->snext(status)) != NULL) {
        CharString pCount;
        pCount.appendInvariantChars(*pluralCount, status);
        const UChar *pluralName = ucurr_getPluralName(
            currency, locale, &unusedIsChoice, pCount.data(),
            &len, &status);
        fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status);
    }
    delete keywords;
}

void
CurrencyAffixInfo::adjustPrecision(
        const UChar *currency, const UCurrencyUsage usage,
        FixedPrecision &precision, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }

    int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage(
            currency, usage, &status);
    precision.fMin.setFracDigitCount(digitCount);
    precision.fMax.setFracDigitCount(digitCount);
    double increment = ucurr_getRoundingIncrementForUsage(
            currency, usage, &status);
    if (increment == 0.0) {
        precision.fRoundingIncrement.clear();
    } else {
        precision.fRoundingIncrement.set(increment);
        // guard against round-off error
        precision.fRoundingIncrement.round(6);
    }
}

void
AffixPattern::addLiteral(
        const UChar *literal, int32_t start, int32_t len) {
    char32Count += u_countChar32(literal + start, len);
    literals.append(literal, start, len);
    int32_t tlen = tokens.length();
    // Takes 4 UChars to encode maximum literal length.
    UChar *tokenChars = tokens.getBuffer(tlen + 4);

    // find start of literal size. May be tlen if there is no literal.
    // While finding start of literal size, compute literal length
    int32_t literalLength = 0;
    int32_t tLiteralStart = tlen;
    while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) {
        tLiteralStart--;
        literalLength <<= 8;
        literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]);
    }
    // Add number of chars we just added to literal
    literalLength += len;

    // Now encode the new length starting at tLiteralStart
    tlen = tLiteralStart;
    tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF);
    literalLength >>= 8;
    while (literalLength) {
        tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF);
        literalLength >>= 8;
    }
    tokens.releaseBuffer(tlen);
}

void
AffixPattern::add(ETokenType t) {
    add(t, 1);
}

void
AffixPattern::addCurrency(uint8_t count) {
    add(kCurrency, count);
}

void
AffixPattern::add(ETokenType t, uint8_t count) {
    U_ASSERT(t != kLiteral);
    char32Count += count;
    switch (t) {
    case kCurrency: 
        hasCurrencyToken = TRUE;
        break;
    case kPercent:
        hasPercentToken = TRUE;
        break;
    case kPerMill:
        hasPermillToken = TRUE;
        break;
    default:
        // Do nothing
        break;
    }
    tokens.append(PACK_TOKEN_AND_LENGTH(t, count));
}

AffixPattern &
AffixPattern::append(const AffixPattern &other) {
    AffixPatternIterator iter;
    other.iterator(iter);
    UnicodeString literal;
    while (iter.nextToken()) {
        switch (iter.getTokenType()) {
        case kLiteral:
            iter.getLiteral(literal);
            addLiteral(literal.getBuffer(), 0, literal.length());
            break;
        case kCurrency:
            addCurrency(iter.getTokenLength());
            break;
        default:
            add(iter.getTokenType());
            break;
        }
    }
    return *this;
}

void
AffixPattern::remove() {
    tokens.remove();
    literals.remove();
    hasCurrencyToken = FALSE;
    hasPercentToken = FALSE;
    hasPermillToken = FALSE;
    char32Count = 0;
}

// escapes literals for strings where special characters are NOT escaped
// except for apostrophe.
static void escapeApostropheInLiteral(
        const UnicodeString &literal, UnicodeStringAppender &appender) {
    int32_t len = literal.length();
    const UChar *buffer = literal.getBuffer();
    for (int32_t i = 0; i < len; ++i) {
        UChar ch = buffer[i];
        switch (ch) {
            case 0x27:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x27);
                break;
            default:
                appender.append(ch);
                break;
        }
    }
}


// escapes literals for user strings where special characters in literals
// are escaped with apostrophe.
static void escapeLiteral(
        const UnicodeString &literal, UnicodeStringAppender &appender) {
    int32_t len = literal.length();
    const UChar *buffer = literal.getBuffer();
    for (int32_t i = 0; i < len; ++i) {
        UChar ch = buffer[i];
        switch (ch) {
            case 0x27:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x27);
                break;
            case 0x25:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x25);
                appender.append((UChar) 0x27);
                break;
            case 0x2030:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x2030);
                appender.append((UChar) 0x27);
                break;
            case 0xA4:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0xA4);
                appender.append((UChar) 0x27);
                break;
            case 0x2D:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x2D);
                appender.append((UChar) 0x27);
                break;
            case 0x2B:
                appender.append((UChar) 0x27);
                appender.append((UChar) 0x2B);
                appender.append((UChar) 0x27);
                break;
            default:
                appender.append(ch);
                break;
        }
    }
}

UnicodeString &
AffixPattern::toString(UnicodeString &appendTo) const {
    AffixPatternIterator iter;
    iterator(iter);
    UnicodeStringAppender appender(appendTo);
    UnicodeString literal;
    while (iter.nextToken()) {
        switch (iter.getTokenType()) {
        case kLiteral:
            escapeApostropheInLiteral(iter.getLiteral(literal), appender);
            break;
        case kPercent:
            appender.append((UChar) 0x27);
            appender.append((UChar) 0x25);
            break;
        case kPerMill:
            appender.append((UChar) 0x27);
            appender.append((UChar) 0x2030);
            break;
        case kCurrency:
            {
                appender.append((UChar) 0x27);
                int32_t cl = iter.getTokenLength();
                for (int32_t i = 0; i < cl; ++i) {
                    appender.append((UChar) 0xA4);
                }
            }
            break;
        case kNegative:
            appender.append((UChar) 0x27);
            appender.append((UChar) 0x2D);
            break;
        case kPositive:
            appender.append((UChar) 0x27);
            appender.append((UChar) 0x2B);
            break;
        default:
            U_ASSERT(FALSE);
            break;
        }
    }
    return appendTo;
}

UnicodeString &
AffixPattern::toUserString(UnicodeString &appendTo) const {
    AffixPatternIterator iter;
    iterator(iter);
    UnicodeStringAppender appender(appendTo);
    UnicodeString literal;
    while (iter.nextToken()) {
        switch (iter.getTokenType()) {
        case kLiteral:
            escapeLiteral(iter.getLiteral(literal), appender);
            break;
        case kPercent:
            appender.append((UChar) 0x25);
            break;
        case kPerMill:
            appender.append((UChar) 0x2030);
            break;
        case kCurrency:
            {
                int32_t cl = iter.getTokenLength();
                for (int32_t i = 0; i < cl; ++i) {
                    appender.append((UChar) 0xA4);
                }
            }
            break;
        case kNegative:
            appender.append((UChar) 0x2D);
            break;
        case kPositive:
            appender.append((UChar) 0x2B);
            break;
        default:
            U_ASSERT(FALSE);
            break;
        }
    }
    return appendTo;
}

class AffixPatternAppender : public UMemory {
public:
    AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { }

    inline void append(UChar x) {
        if (fIdx == UPRV_LENGTHOF(fBuffer)) {
            fDest->addLiteral(fBuffer, 0, fIdx);
            fIdx = 0;
        }
        fBuffer[fIdx++] = x;
    }

    inline void append(UChar32 x) {
        if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
            fDest->addLiteral(fBuffer, 0, fIdx);
            fIdx = 0;
        }
        U16_APPEND_UNSAFE(fBuffer, fIdx, x);
    }

    inline void flush() {
        if (fIdx) {
            fDest->addLiteral(fBuffer, 0, fIdx);
        }
        fIdx = 0;
    }

    /**
     * flush the buffer when we go out of scope.
     */
    ~AffixPatternAppender() {
        flush();
    }
private:
    AffixPattern *fDest;
    int32_t fIdx;
    UChar fBuffer[32];
    AffixPatternAppender(const AffixPatternAppender &other);
    AffixPatternAppender &operator=(const AffixPatternAppender &other);
};


AffixPattern &
AffixPattern::parseUserAffixString(
        const UnicodeString &affixStr,
        AffixPattern &appendTo, 
        UErrorCode &status) {
    if (U_FAILURE(status)) {
        return appendTo;
    }
    int32_t len = affixStr.length();
    const UChar *buffer = affixStr.getBuffer();
    // 0 = not quoted; 1 = quoted.
    int32_t state = 0;
    AffixPatternAppender appender(appendTo);
    for (int32_t i = 0; i < len; ) {
        UChar token;
        int32_t tokenSize = nextUserToken(buffer, i, len, &token);
        i += tokenSize;
        if (token == 0x27 && tokenSize == 1) { // quote
            state = 1 - state;
            continue;
        }
        if (state == 0) {
            switch (token) {
            case 0x25:
                appender.flush();
                appendTo.add(kPercent, 1);
                break;
            case 0x27:  // double quote
                appender.append((UChar) 0x27);
                break;
            case 0x2030:
                appender.flush();
                appendTo.add(kPerMill, 1);
                break;
            case 0x2D:
                appender.flush();
                appendTo.add(kNegative, 1);
                break;
            case 0x2B:
                appender.flush();
                appendTo.add(kPositive, 1);
                break;
            case 0xA4:
                appender.flush();
                appendTo.add(kCurrency, tokenSize);
                break;
            default:
                appender.append(token);
                break;
            }
        } else {
            switch (token) {
            case 0x27:  // double quote
                appender.append((UChar) 0x27);
                break;
            case 0xA4: // included b/c tokenSize can be > 1
                for (int32_t j = 0; j < tokenSize; ++j) {
                    appender.append((UChar) 0xA4);
                }
                break;
            default:
                appender.append(token);
                break;
            }
        }
    }
    return appendTo;
}

AffixPattern &
AffixPattern::parseAffixString(
        const UnicodeString &affixStr,
        AffixPattern &appendTo, 
        UErrorCode &status) {
    if (U_FAILURE(status)) {
        return appendTo;
    }
    int32_t len = affixStr.length();
    const UChar *buffer = affixStr.getBuffer();
    for (int32_t i = 0; i < len; ) {
        UChar token;
        int32_t tokenSize = nextToken(buffer, i, len, &token);
        if (tokenSize == 1) {
            int32_t literalStart = i;
            ++i;
            while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) {
                ++i;
            }
            appendTo.addLiteral(buffer, literalStart, i - literalStart);

            // If we reached end of string, we are done
            if (i == len) {
                return appendTo;
            }
        }
        i += tokenSize;
        switch (token) {
        case 0x25:
            appendTo.add(kPercent, 1);
            break;
        case 0x2030:
            appendTo.add(kPerMill, 1);
            break;
        case 0x2D:
            appendTo.add(kNegative, 1);
            break;
        case 0x2B:
            appendTo.add(kPositive, 1);
            break;
        case 0xA4:
            {
                if (tokenSize - 1 > 3) {
                    status = U_PARSE_ERROR;
                    return appendTo;
                }
                appendTo.add(kCurrency, tokenSize - 1);
            }
            break;
        default:
            appendTo.addLiteral(&token, 0, 1);
            break;
        }
    }
    return appendTo;
}

AffixPatternIterator &
AffixPattern::iterator(AffixPatternIterator &result) const {
    result.nextLiteralIndex = 0;
    result.lastLiteralLength = 0;
    result.nextTokenIndex = 0;
    result.tokens = &tokens;
    result.literals = &literals;
    return result;
}

UBool
AffixPatternIterator::nextToken() {
    int32_t tlen = tokens->length();
    if (nextTokenIndex == tlen) {
        return FALSE;
    }
    ++nextTokenIndex;
    const UChar *tokenBuffer = tokens->getBuffer();
    if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) ==
            AffixPattern::kLiteral) {
        while (nextTokenIndex < tlen &&
                UNPACK_LONG(tokenBuffer[nextTokenIndex])) {
            ++nextTokenIndex;
        }
        lastLiteralLength = 0;
        int32_t i = nextTokenIndex - 1;
        for (; UNPACK_LONG(tokenBuffer[i]); --i) {
            lastLiteralLength <<= 8;
            lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
        }
        lastLiteralLength <<= 8;
        lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);
        nextLiteralIndex += lastLiteralLength;
    }
    return TRUE;
}

AffixPattern::ETokenType
AffixPatternIterator::getTokenType() const {
    return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1));
}

UnicodeString &
AffixPatternIterator::getLiteral(UnicodeString &result) const {
    const UChar *buffer = literals->getBuffer();
    result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength);
    return result;
}

int32_t
AffixPatternIterator::getTokenLength() const {
    const UChar *tokenBuffer = tokens->getBuffer();
    AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]);
    return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]);
}

AffixPatternParser::AffixPatternParser()
        : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) {
}

AffixPatternParser::AffixPatternParser(
        const DecimalFormatSymbols &symbols) {
    setDecimalFormatSymbols(symbols);
}

void
AffixPatternParser::setDecimalFormatSymbols(
        const DecimalFormatSymbols &symbols) {
    fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
    fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
    fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
    fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
}

PluralAffix &
AffixPatternParser::parse(
        const AffixPattern &affixPattern,
        const CurrencyAffixInfo &currencyAffixInfo,
        PluralAffix &appendTo, 
        UErrorCode &status) const {
    if (U_FAILURE(status)) {
        return appendTo;
    }
    AffixPatternIterator iter;
    affixPattern.iterator(iter);
    UnicodeString literal;
    while (iter.nextToken()) {
        switch (iter.getTokenType()) {
        case AffixPattern::kPercent:
            appendTo.append(fPercent, UNUM_PERCENT_FIELD);
            break;
        case AffixPattern::kPerMill:
            appendTo.append(fPermill, UNUM_PERMILL_FIELD);
            break;
        case AffixPattern::kNegative:
            appendTo.append(fNegative, UNUM_SIGN_FIELD);
            break;
        case AffixPattern::kPositive:
            appendTo.append(fPositive, UNUM_SIGN_FIELD);
            break;
        case AffixPattern::kCurrency:
            switch (iter.getTokenLength()) {
                case 1:
                    appendTo.append(
                            currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD);
                    break;
                case 2:
                    appendTo.append(
                            currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD);
                    break;
                case 3:
                    appendTo.append(
                            currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status);
                    break;
                default:
                    U_ASSERT(FALSE);
                    break;
            }
            break;
        case AffixPattern::kLiteral:
            appendTo.append(iter.getLiteral(literal));
            break;
        default:
            U_ASSERT(FALSE);
            break;
        }
    }
    return appendTo;
}


U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
2ca993e8 A	3	/*
	4	* Copyright (C) 2015, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	* file name: affixpatternparser.cpp
	8	*/
	9
	10	#include "unicode/utypes.h"
	11
	12	#if !UCONFIG_NO_FORMATTING
	13
	14	#include "unicode/dcfmtsym.h"
	15	#include "unicode/plurrule.h"
f3c0d7a5	16	#include "unicode/strenum.h"
2ca993e8	17	#include "unicode/ucurr.h"
f3c0d7a5	18	#include "unicode/ustring.h"
2ca993e8 A	19	#include "affixpatternparser.h"
	20	#include "charstr.h"
	21	#include "precision.h"
	22	#include "uassert.h"
	23	#include "unistrappender.h"
	24
	25	static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4};
	26
	27	static UChar gPercent = 0x25;
	28	static UChar gPerMill = 0x2030;
	29	static UChar gNegative = 0x2D;
	30	static UChar gPositive = 0x2B;
	31
	32	#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) \| (l & 0xFF)))
	33
	34	#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))
	35
	36	#define UNPACK_LONG(c) (((c) >> 8) & 0x80)
	37
	38	#define UNPACK_LENGTH(c) ((c) & 0xFF)
	39
	40	U_NAMESPACE_BEGIN
	41
	42	static int32_t
	43	nextToken(const UChar buffer, int32_t idx, int32_t len, UChar token) {
	44	if (buffer[idx] != 0x27 \|\| idx + 1 == len) {
	45	*token = buffer[idx];
	46	return 1;
	47	}
	48	*token = buffer[idx + 1];
	49	if (buffer[idx + 1] == 0xA4) {
	50	int32_t i = 2;
f3c0d7a5 A	51	for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i)
f3c0d7a5 A	52	;
2ca993e8 A	53	return i;
	54	}
	55	return 2;
	56	}
	57
	58	static int32_t
	59	nextUserToken(const UChar buffer, int32_t idx, int32_t len, UChar token) {
	60	*token = buffer[idx];
	61	int32_t max;
	62	switch (buffer[idx]) {
	63	case 0x27:
	64	max = 2;
	65	break;
	66	case 0xA4:
	67	max = 3;
	68	break;
	69	default:
	70	max = 1;
	71	break;
	72	}
	73	int32_t i = 1;
f3c0d7a5 A	74	for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i)
f3c0d7a5 A	75	;
2ca993e8 A	76	return i;
	77	}
	78
	79	CurrencyAffixInfo::CurrencyAffixInfo()
	80	: fSymbol(gDefaultSymbols, 1),
	81	fISO(gDefaultSymbols, 2),
	82	fLong(DigitAffix(gDefaultSymbols, 3)),
	83	fIsDefault(TRUE) {
	84	}
	85
	86	void
	87	CurrencyAffixInfo::set(
	88	const char *locale,
	89	const PluralRules *rules,
	90	const UChar *currency,
	91	UErrorCode &status) {
	92	if (U_FAILURE(status)) {
	93	return;
	94	}
	95	fIsDefault = FALSE;
	96	if (currency == NULL) {
	97	fSymbol.setTo(gDefaultSymbols, 1);
	98	fISO.setTo(gDefaultSymbols, 2);
	99	fLong.remove();
	100	fLong.append(gDefaultSymbols, 3);
	101	fIsDefault = TRUE;
	102	return;
	103	}
	104	int32_t len;
	105	UBool unusedIsChoice;
	106	const UChar *symbol = ucurr_getName(
	107	currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice,
	108	&len, &status);
	109	if (U_FAILURE(status)) {
	110	return;
	111	}
	112	fSymbol.setTo(symbol, len);
	113	fISO.setTo(currency, u_strlen(currency));
	114	fLong.remove();
	115	StringEnumeration* keywords = rules->getKeywords(status);
	116	if (U_FAILURE(status)) {
	117	return;
	118	}
	119	const UnicodeString* pluralCount;
	120	while ((pluralCount = keywords->snext(status)) != NULL) {
	121	CharString pCount;
	122	pCount.appendInvariantChars(*pluralCount, status);
	123	const UChar *pluralName = ucurr_getPluralName(
	124	currency, locale, &unusedIsChoice, pCount.data(),
	125	&len, &status);
	126	fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status);
	127	}
	128	delete keywords;
	129	}
	130
	131	void
	132	CurrencyAffixInfo::adjustPrecision(
	133	const UChar *currency, const UCurrencyUsage usage,
	134	FixedPrecision &precision, UErrorCode &status) {
	135	if (U_FAILURE(status)) {
	136	return;
	137	}
	138
	139	int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage(
140	currency, usage, &status);
141	precision.fMin.setFracDigitCount(digitCount);
142	precision.fMax.setFracDigitCount(digitCount);
143	double increment = ucurr_getRoundingIncrementForUsage(
144	currency, usage, &status);
145	if (increment == 0.0) {
146	precision.fRoundingIncrement.clear();
147	} else {
148	precision.fRoundingIncrement.set(increment);
149	// guard against round-off error
150	precision.fRoundingIncrement.round(6);
151	}
152	}
153
154	void
155	AffixPattern::addLiteral(
156	const UChar *literal, int32_t start, int32_t len) {
157	char32Count += u_countChar32(literal + start, len);
158	literals.append(literal, start, len);
159	int32_t tlen = tokens.length();
160	// Takes 4 UChars to encode maximum literal length.
161	UChar *tokenChars = tokens.getBuffer(tlen + 4);
162
163	// find start of literal size. May be tlen if there is no literal.
164	// While finding start of literal size, compute literal length
165	int32_t literalLength = 0;
166	int32_t tLiteralStart = tlen;
167	while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) {
168	tLiteralStart--;
169	literalLength <<= 8;
170	literalLength \|= UNPACK_LENGTH(tokenChars[tLiteralStart]);
171	}
172	// Add number of chars we just added to literal
173	literalLength += len;
174
175	// Now encode the new length starting at tLiteralStart
176	tlen = tLiteralStart;
177	tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF);
178	literalLength >>= 8;
179	while (literalLength) {
180	tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral \| 0x80, literalLength & 0xFF);
181	literalLength >>= 8;
182	}
183	tokens.releaseBuffer(tlen);
184	}
185
186	void
187	AffixPattern::add(ETokenType t) {
188	add(t, 1);
189	}
190
191	void
192	AffixPattern::addCurrency(uint8_t count) {
193	add(kCurrency, count);
194	}
195
196	void
197	AffixPattern::add(ETokenType t, uint8_t count) {
198	U_ASSERT(t != kLiteral);
199	char32Count += count;
200	switch (t) {
201	case kCurrency:
202	hasCurrencyToken = TRUE;
203	break;
204	case kPercent:
205	hasPercentToken = TRUE;
206	break;
207	case kPerMill:
208	hasPermillToken = TRUE;
209	break;
210	default:
211	// Do nothing
212	break;
213	}
214	tokens.append(PACK_TOKEN_AND_LENGTH(t, count));
215	}
216
217	AffixPattern &
218	AffixPattern::append(const AffixPattern &other) {
219	AffixPatternIterator iter;
220	other.iterator(iter);
221	UnicodeString literal;
222	while (iter.nextToken()) {
223	switch (iter.getTokenType()) {
224	case kLiteral:
225	iter.getLiteral(literal);
226	addLiteral(literal.getBuffer(), 0, literal.length());
227	break;
228	case kCurrency:
229	addCurrency(iter.getTokenLength());
230	break;
231	default:
232	add(iter.getTokenType());
233	break;
234	}
235	}
236	return *this;
237	}
238
239	void
240	AffixPattern::remove() {
241	tokens.remove();
242	literals.remove();
243	hasCurrencyToken = FALSE;
244	hasPercentToken = FALSE;
245	hasPermillToken = FALSE;
246	char32Count = 0;
247	}
248
249	// escapes literals for strings where special characters are NOT escaped
250	// except for apostrophe.
251	static void escapeApostropheInLiteral(
252	const UnicodeString &literal, UnicodeStringAppender &appender) {
253	int32_t len = literal.length();
254	const UChar *buffer = literal.getBuffer();
255	for (int32_t i = 0; i < len; ++i) {
256	UChar ch = buffer[i];
257	switch (ch) {
258	case 0x27:
259	appender.append((UChar) 0x27);
260	appender.append((UChar) 0x27);
261	break;
262	default:
263	appender.append(ch);
264	break;
265	}
266	}
267	}
268
269
270	// escapes literals for user strings where special characters in literals
271	// are escaped with apostrophe.
272	static void escapeLiteral(
273	const UnicodeString &literal, UnicodeStringAppender &appender) {
274	int32_t len = literal.length();
275	const UChar *buffer = literal.getBuffer();
276	for (int32_t i = 0; i < len; ++i) {
277	UChar ch = buffer[i];
278	switch (ch) {
279	case 0x27:
280	appender.append((UChar) 0x27);
281	appender.append((UChar) 0x27);
282	break;
283	case 0x25:
284	appender.append((UChar) 0x27);
285	appender.append((UChar) 0x25);
286	appender.append((UChar) 0x27);
287	break;
288	case 0x2030:
289	appender.append((UChar) 0x27);
290	appender.append((UChar) 0x2030);
291	appender.append((UChar) 0x27);
292	break;
293	case 0xA4:
294	appender.append((UChar) 0x27);
295	appender.append((UChar) 0xA4);
296	appender.append((UChar) 0x27);
297	break;
298	case 0x2D:
299	appender.append((UChar) 0x27);
300	appender.append((UChar) 0x2D);
301	appender.append((UChar) 0x27);
302	break;
303	case 0x2B:
304	appender.append((UChar) 0x27);
305	appender.append((UChar) 0x2B);
306	appender.append((UChar) 0x27);
307	break;
308	default:
309	appender.append(ch);
310	break;
311	}
312	}
313	}
314
315	UnicodeString &
316	AffixPattern::toString(UnicodeString &appendTo) const {
317	AffixPatternIterator iter;
318	iterator(iter);
319	UnicodeStringAppender appender(appendTo);
320	UnicodeString literal;
321	while (iter.nextToken()) {
322	switch (iter.getTokenType()) {
323	case kLiteral:
324	escapeApostropheInLiteral(iter.getLiteral(literal), appender);
325	break;
326	case kPercent:
327	appender.append((UChar) 0x27);
328	appender.append((UChar) 0x25);
329	break;
330	case kPerMill:
331	appender.append((UChar) 0x27);
332	appender.append((UChar) 0x2030);
333	break;
334	case kCurrency:
335	{
336	appender.append((UChar) 0x27);
337	int32_t cl = iter.getTokenLength();
338	for (int32_t i = 0; i < cl; ++i) {
339	appender.append((UChar) 0xA4);
340	}
341	}
342	break;
343	case kNegative:
344	appender.append((UChar) 0x27);
345	appender.append((UChar) 0x2D);
346	break;
347	case kPositive:
348	appender.append((UChar) 0x27);
349	appender.append((UChar) 0x2B);
350	break;
351	default:
352	U_ASSERT(FALSE);
353	break;
354	}
355	}
356	return appendTo;
357	}
358
359	UnicodeString &
360	AffixPattern::toUserString(UnicodeString &appendTo) const {
361	AffixPatternIterator iter;
362	iterator(iter);
363	UnicodeStringAppender appender(appendTo);
364	UnicodeString literal;
365	while (iter.nextToken()) {
366	switch (iter.getTokenType()) {
367	case kLiteral:
368	escapeLiteral(iter.getLiteral(literal), appender);
369	break;
370	case kPercent:
371	appender.append((UChar) 0x25);
372	break;
373	case kPerMill:
374	appender.append((UChar) 0x2030);
375	break;
376	case kCurrency:
377	{
378	int32_t cl = iter.getTokenLength();
379	for (int32_t i = 0; i < cl; ++i) {
380	appender.append((UChar) 0xA4);
381	}
382	}
383	break;
384	case kNegative:
385	appender.append((UChar) 0x2D);
386	break;
387	case kPositive:
388	appender.append((UChar) 0x2B);
389	break;
390	default:
391	U_ASSERT(FALSE);
392	break;
393	}
394	}
395	return appendTo;
396	}
397
398	class AffixPatternAppender : public UMemory {
399	public:
400	AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { }
401
402	inline void append(UChar x) {
403	if (fIdx == UPRV_LENGTHOF(fBuffer)) {
404	fDest->addLiteral(fBuffer, 0, fIdx);
405	fIdx = 0;
406	}
407	fBuffer[fIdx++] = x;
408	}
409
410	inline void append(UChar32 x) {
411	if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {
412	fDest->addLiteral(fBuffer, 0, fIdx);
413	fIdx = 0;
414	}
415	U16_APPEND_UNSAFE(fBuffer, fIdx, x);
416	}
417
418	inline void flush() {
419	if (fIdx) {
420	fDest->addLiteral(fBuffer, 0, fIdx);
421	}
422	fIdx = 0;
423	}
424
425	/**
426	* flush the buffer when we go out of scope.
427	*/
428	~AffixPatternAppender() {
429	flush();
430	}
431	private:
432	AffixPattern *fDest;
433	int32_t fIdx;
434	UChar fBuffer[32];
435	AffixPatternAppender(const AffixPatternAppender &other);
436	AffixPatternAppender &operator=(const AffixPatternAppender &other);
437	};
438
439
440	AffixPattern &
441	AffixPattern::parseUserAffixString(
442	const UnicodeString &affixStr,
443	AffixPattern &appendTo,
444	UErrorCode &status) {
445	if (U_FAILURE(status)) {
446	return appendTo;
447	}
448	int32_t len = affixStr.length();
449	const UChar *buffer = affixStr.getBuffer();
450	// 0 = not quoted; 1 = quoted.
451	int32_t state = 0;
452	AffixPatternAppender appender(appendTo);
453	for (int32_t i = 0; i < len; ) {
454	UChar token;
455	int32_t tokenSize = nextUserToken(buffer, i, len, &token);
456	i += tokenSize;
457	if (token == 0x27 && tokenSize == 1) { // quote
458	state = 1 - state;
459	continue;
460	}
461	if (state == 0) {
462	switch (token) {
463	case 0x25:
464	appender.flush();
465	appendTo.add(kPercent, 1);
466	break;
467	case 0x27: // double quote
468	appender.append((UChar) 0x27);
469	break;
470	case 0x2030:
471	appender.flush();
472	appendTo.add(kPerMill, 1);
473	break;
474	case 0x2D:
475	appender.flush();
476	appendTo.add(kNegative, 1);
477	break;
478	case 0x2B:
479	appender.flush();
480	appendTo.add(kPositive, 1);
481	break;
482	case 0xA4:
483	appender.flush();
484	appendTo.add(kCurrency, tokenSize);
485	break;
486	default:
487	appender.append(token);
488	break;
489	}
490	} else {
491	switch (token) {
492	case 0x27: // double quote
493	appender.append((UChar) 0x27);
494	break;
495	case 0xA4: // included b/c tokenSize can be > 1
496	for (int32_t j = 0; j < tokenSize; ++j) {
497	appender.append((UChar) 0xA4);
498	}
499	break;
500	default:
501	appender.append(token);
502	break;
503	}
504	}
505	}
506	return appendTo;
507	}
508
509	AffixPattern &
510	AffixPattern::parseAffixString(
511	const UnicodeString &affixStr,
512	AffixPattern &appendTo,
513	UErrorCode &status) {
514	if (U_FAILURE(status)) {
515	return appendTo;
516	}
517	int32_t len = affixStr.length();
518	const UChar *buffer = affixStr.getBuffer();
519	for (int32_t i = 0; i < len; ) {
520	UChar token;
521	int32_t tokenSize = nextToken(buffer, i, len, &token);
522	if (tokenSize == 1) {
523	int32_t literalStart = i;
524	++i;
525	while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) {
526	++i;
527	}
528	appendTo.addLiteral(buffer, literalStart, i - literalStart);
529
530	// If we reached end of string, we are done
531	if (i == len) {
532	return appendTo;
533	}
534	}
535	i += tokenSize;
536	switch (token) {
537	case 0x25:
538	appendTo.add(kPercent, 1);
539	break;
540	case 0x2030:
541	appendTo.add(kPerMill, 1);
542	break;
543	case 0x2D:
544	appendTo.add(kNegative, 1);
545	break;
546	case 0x2B:
547	appendTo.add(kPositive, 1);
548	break;
549	case 0xA4:
550	{
551	if (tokenSize - 1 > 3) {
552	status = U_PARSE_ERROR;
553	return appendTo;
554	}
555	appendTo.add(kCurrency, tokenSize - 1);
556	}
557	break;
558	default:
559	appendTo.addLiteral(&token, 0, 1);
560	break;
561	}
562	}
563	return appendTo;
564	}
565
566	AffixPatternIterator &
567	AffixPattern::iterator(AffixPatternIterator &result) const {
568	result.nextLiteralIndex = 0;
569	result.lastLiteralLength = 0;
570	result.nextTokenIndex = 0;
571	result.tokens = &tokens;
572	result.literals = &literals;
573	return result;
574	}
575
576	UBool
577	AffixPatternIterator::nextToken() {
578	int32_t tlen = tokens->length();
579	if (nextTokenIndex == tlen) {
580	return FALSE;
581	}
582	++nextTokenIndex;
583	const UChar *tokenBuffer = tokens->getBuffer();
584	if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) ==
585	AffixPattern::kLiteral) {
586	while (nextTokenIndex < tlen &&
587	UNPACK_LONG(tokenBuffer[nextTokenIndex])) {
588	++nextTokenIndex;
589	}
590	lastLiteralLength = 0;
591	int32_t i = nextTokenIndex - 1;
592	for (; UNPACK_LONG(tokenBuffer[i]); --i) {
593	lastLiteralLength <<= 8;
594	lastLiteralLength \|= UNPACK_LENGTH(tokenBuffer[i]);
595	}
596	lastLiteralLength <<= 8;
597	lastLiteralLength \|= UNPACK_LENGTH(tokenBuffer[i]);
598	nextLiteralIndex += lastLiteralLength;
599	}
600	return TRUE;
601	}
602
603	AffixPattern::ETokenType
604	AffixPatternIterator::getTokenType() const {
605	return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1));
606	}
607
608	UnicodeString &
609	AffixPatternIterator::getLiteral(UnicodeString &result) const {
610	const UChar *buffer = literals->getBuffer();
611	result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength);
612	return result;
613	}
614
615	int32_t
616	AffixPatternIterator::getTokenLength() const {
617	const UChar *tokenBuffer = tokens->getBuffer();
618	AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]);
619	return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]);
620	}
621
622	AffixPatternParser::AffixPatternParser()
623	: fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) {
624	}
625
626	AffixPatternParser::AffixPatternParser(
627	const DecimalFormatSymbols &symbols) {
628	setDecimalFormatSymbols(symbols);
629	}
630
631	void
632	AffixPatternParser::setDecimalFormatSymbols(
633	const DecimalFormatSymbols &symbols) {
634	fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);
635	fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);
636	fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
637	fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
638	}
639
640	PluralAffix &
641	AffixPatternParser::parse(
642	const AffixPattern &affixPattern,
643	const CurrencyAffixInfo &currencyAffixInfo,
644	PluralAffix &appendTo,
645	UErrorCode &status) const {
646	if (U_FAILURE(status)) {
647	return appendTo;
648	}
649	AffixPatternIterator iter;
650	affixPattern.iterator(iter);
651	UnicodeString literal;
652	while (iter.nextToken()) {
653	switch (iter.getTokenType()) {
654	case AffixPattern::kPercent:
655	appendTo.append(fPercent, UNUM_PERCENT_FIELD);
656	break;
657	case AffixPattern::kPerMill:
658	appendTo.append(fPermill, UNUM_PERMILL_FIELD);
659	break;
660	case AffixPattern::kNegative:
661	appendTo.append(fNegative, UNUM_SIGN_FIELD);
662	break;
663	case AffixPattern::kPositive:
664	appendTo.append(fPositive, UNUM_SIGN_FIELD);
665	break;
666	case AffixPattern::kCurrency:
667	switch (iter.getTokenLength()) {
668	case 1:
669	appendTo.append(
670	currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD);
671	break;
672	case 2:
673	appendTo.append(
674	currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD);
675	break;
676	case 3:
677	appendTo.append(
678	currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status);
679	break;
680	default:
681	U_ASSERT(FALSE);
682	break;
683	}
684	break;
685	case AffixPattern::kLiteral:
686	appendTo.append(iter.getLiteral(literal));
687	break;
688	default:
689	U_ASSERT(FALSE);
690	break;
691	}
692	}
693	return appendTo;
694	}
695
696
697	U_NAMESPACE_END
698	#endif /* #if !UCONFIG_NO_FORMATTING */