X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..d25163bfc042dbef00577180ee21dd3460fc3715:/icuSources/common/simplepatternformatter.cpp?ds=inline diff --git a/icuSources/common/simplepatternformatter.cpp b/icuSources/common/simplepatternformatter.cpp index 4c2cdac2..0cac2ec3 100644 --- a/icuSources/common/simplepatternformatter.cpp +++ b/icuSources/common/simplepatternformatter.cpp @@ -9,24 +9,40 @@ #include "cstring.h" #include "uassert.h" -#define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof((array)[0])) - U_NAMESPACE_BEGIN +static UBool isInvalidArray(const void *array, int32_t size) { + return (size < 0 || (size > 0 && array == NULL)); +} + typedef enum SimplePatternFormatterCompileState { INIT, APOSTROPHE, PLACEHOLDER } SimplePatternFormatterCompileState; +// Handles parsing placeholders in the pattern string, e.g {4} or {35} class SimplePatternFormatterIdBuilder { public: SimplePatternFormatterIdBuilder() : id(0), idLen(0) { } ~SimplePatternFormatterIdBuilder() { } + + // Resets so that this object has seen no placeholder ID. void reset() { id = 0; idLen = 0; } + + // Returns the numeric placeholder ID parsed so far int32_t getId() const { return id; } + + // Appends the numeric placeholder ID parsed so far back to a + // UChar buffer. Used to recover if parser using this object finds + // no closing curly brace. void appendTo(UChar *buffer, int32_t *len) const; + + // Returns true if this object has seen a placeholder ID. UBool isValid() const { return (idLen > 0); } + + // Processes a single digit character. Pattern string parser calls this + // as it processes digits after an opening curly brace. void add(UChar ch); private: int32_t id; @@ -54,20 +70,81 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) { idLen++; } +// Represents placeholder values. +class SimplePatternFormatterPlaceholderValues : public UMemory { +public: + SimplePatternFormatterPlaceholderValues( + const UnicodeString * const *values, + int32_t valuesCount); + + // Returns TRUE if appendTo value is at any index besides exceptIndex. + UBool isAppendToInAnyIndexExcept( + const UnicodeString &appendTo, int32_t exceptIndex) const; + + // For each appendTo value, stores the snapshot of it in its place. + void snapshotAppendTo(const UnicodeString &appendTo); + + // Returns the placeholder value at index. No range checking performed. + // Returned reference is valid for as long as this object exists. + const UnicodeString &get(int32_t index) const; +private: + const UnicodeString * const *fValues; + int32_t fValuesCount; + const UnicodeString *fAppendTo; + UnicodeString fAppendToCopy; + SimplePatternFormatterPlaceholderValues( + const SimplePatternFormatterPlaceholderValues &); + SimplePatternFormatterPlaceholderValues &operator=( + const SimplePatternFormatterPlaceholderValues &); +}; + +SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues( + const UnicodeString * const *values, + int32_t valuesCount) + : fValues(values), + fValuesCount(valuesCount), + fAppendTo(NULL), + fAppendToCopy() { +} + +UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept( + const UnicodeString &appendTo, int32_t exceptIndex) const { + for (int32_t i = 0; i < fValuesCount; ++i) { + if (i != exceptIndex && fValues[i] == &appendTo) { + return TRUE; + } + } + return FALSE; +} + +void SimplePatternFormatterPlaceholderValues::snapshotAppendTo( + const UnicodeString &appendTo) { + fAppendTo = &appendTo; + fAppendToCopy = appendTo; +} + +const UnicodeString &SimplePatternFormatterPlaceholderValues::get( + int32_t index) const { + if (fAppendTo == NULL || fAppendTo != fValues[index]) { + return *fValues[index]; + } + return fAppendToCopy; +} + SimplePatternFormatter::SimplePatternFormatter() : noPlaceholders(), - placeholdersByOffset(placeholderBuffer), + placeholders(), placeholderSize(0), - placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT), - placeholderCount(0) { + placeholderCount(0), + firstPlaceholderReused(FALSE) { } SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) : noPlaceholders(), - placeholdersByOffset(placeholderBuffer), + placeholders(), placeholderSize(0), - placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT), - placeholderCount(0) { + placeholderCount(0), + firstPlaceholderReused(FALSE) { UErrorCode status = U_ZERO_ERROR; compile(pattern, status); } @@ -75,15 +152,15 @@ SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) : SimplePatternFormatter::SimplePatternFormatter( const SimplePatternFormatter &other) : noPlaceholders(other.noPlaceholders), - placeholdersByOffset(placeholderBuffer), + placeholders(), placeholderSize(0), - placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT), - placeholderCount(other.placeholderCount) { + placeholderCount(other.placeholderCount), + firstPlaceholderReused(other.firstPlaceholderReused) { placeholderSize = ensureCapacity(other.placeholderSize); uprv_memcpy( - placeholdersByOffset, - other.placeholdersByOffset, - placeholderSize * 2 * sizeof(int32_t)); + placeholders.getAlias(), + other.placeholders.getAlias(), + placeholderSize * sizeof(PlaceholderInfo)); } SimplePatternFormatter &SimplePatternFormatter::operator=( @@ -92,19 +169,17 @@ SimplePatternFormatter &SimplePatternFormatter::operator=( return *this; } noPlaceholders = other.noPlaceholders; - placeholderCount = other.placeholderCount; placeholderSize = ensureCapacity(other.placeholderSize); + placeholderCount = other.placeholderCount; + firstPlaceholderReused = other.firstPlaceholderReused; uprv_memcpy( - placeholdersByOffset, - other.placeholdersByOffset, - placeholderSize * 2 * sizeof(int32_t)); + placeholders.getAlias(), + other.placeholders.getAlias(), + placeholderSize * sizeof(PlaceholderInfo)); return *this; } SimplePatternFormatter::~SimplePatternFormatter() { - if (placeholdersByOffset != placeholderBuffer) { - uprv_free(placeholdersByOffset); - } } UBool SimplePatternFormatter::compile( @@ -188,9 +263,9 @@ UnicodeString& SimplePatternFormatter::format( UnicodeString &appendTo, UErrorCode &status) const { const UnicodeString *params[] = {&arg0}; - return format( + return formatAndAppend( params, - LENGTHOF(params), + UPRV_LENGTHOF(params), appendTo, NULL, 0, @@ -203,9 +278,9 @@ UnicodeString& SimplePatternFormatter::format( UnicodeString &appendTo, UErrorCode &status) const { const UnicodeString *params[] = {&arg0, &arg1}; - return format( + return formatAndAppend( params, - LENGTHOF(params), + UPRV_LENGTHOF(params), appendTo, NULL, 0, @@ -219,9 +294,9 @@ UnicodeString& SimplePatternFormatter::format( UnicodeString &appendTo, UErrorCode &status) const { const UnicodeString *params[] = {&arg0, &arg1, &arg2}; - return format( + return formatAndAppend( params, - LENGTHOF(params), + UPRV_LENGTHOF(params), appendTo, NULL, 0, @@ -243,10 +318,14 @@ static void appendRange( int32_t start, int32_t end, UnicodeString &dest) { + // This check improves performance significantly. + if (start == end) { + return; + } dest.append(src, start, end - start); } -UnicodeString& SimplePatternFormatter::format( +UnicodeString& SimplePatternFormatter::formatAndAppend( const UnicodeString * const *placeholderValues, int32_t placeholderValueCount, UnicodeString &appendTo, @@ -256,10 +335,102 @@ UnicodeString& SimplePatternFormatter::format( if (U_FAILURE(status)) { return appendTo; } + if (isInvalidArray(placeholderValues, placeholderValueCount) + || isInvalidArray(offsetArray, offsetArrayLength)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } if (placeholderValueCount < placeholderCount) { status = U_ILLEGAL_ARGUMENT_ERROR; return appendTo; } + + // Since we are disallowing parameter values that are the same as + // appendTo, we have to check all placeholderValues as opposed to + // the first placeholderCount placeholder values. + SimplePatternFormatterPlaceholderValues values( + placeholderValues, placeholderValueCount); + if (values.isAppendToInAnyIndexExcept(appendTo, -1)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + return formatAndAppend( + values, + appendTo, + offsetArray, + offsetArrayLength); +} + +UnicodeString& SimplePatternFormatter::formatAndReplace( + const UnicodeString * const *placeholderValues, + int32_t placeholderValueCount, + UnicodeString &result, + int32_t *offsetArray, + int32_t offsetArrayLength, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return result; + } + if (isInvalidArray(placeholderValues, placeholderValueCount) + || isInvalidArray(offsetArray, offsetArrayLength)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + if (placeholderValueCount < placeholderCount) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + SimplePatternFormatterPlaceholderValues values( + placeholderValues, placeholderCount); + int32_t placeholderAtStart = getUniquePlaceholderAtStart(); + + // If pattern starts with a unique placeholder and that placeholder + // value is result, we may be able to optimize by just appending to result. + if (placeholderAtStart >= 0 + && placeholderValues[placeholderAtStart] == &result) { + + // If result is the value for other placeholders, call off optimization. + if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { + values.snapshotAppendTo(result); + result.remove(); + return formatAndAppend( + values, + result, + offsetArray, + offsetArrayLength); + } + + // Otherwise we can optimize + formatAndAppend( + values, + result, + offsetArray, + offsetArrayLength); + + // We have to make the offset for the placeholderAtStart + // placeholder be 0. Otherwise it would be the length of the + // previous value of result. + if (offsetArrayLength > placeholderAtStart) { + offsetArray[placeholderAtStart] = 0; + } + return result; + } + if (values.isAppendToInAnyIndexExcept(result, -1)) { + values.snapshotAppendTo(result); + } + result.remove(); + return formatAndAppend( + values, + result, + offsetArray, + offsetArrayLength); +} + +UnicodeString& SimplePatternFormatter::formatAndAppend( + const SimplePatternFormatterPlaceholderValues &values, + UnicodeString &appendTo, + int32_t *offsetArray, + int32_t offsetArrayLength) const { for (int32_t i = 0; i < offsetArrayLength; ++i) { offsetArray[i] = -1; } @@ -270,72 +441,79 @@ UnicodeString& SimplePatternFormatter::format( appendRange( noPlaceholders, 0, - placeholdersByOffset[0], + placeholders[0].offset, appendTo); updatePlaceholderOffset( - placeholdersByOffset[1], + placeholders[0].id, appendTo.length(), offsetArray, offsetArrayLength); - appendTo.append(*placeholderValues[placeholdersByOffset[1]]); + const UnicodeString *placeholderValue = &values.get(placeholders[0].id); + if (placeholderValue != &appendTo) { + appendTo.append(*placeholderValue); + } for (int32_t i = 1; i < placeholderSize; ++i) { appendRange( noPlaceholders, - placeholdersByOffset[2 * i - 2], - placeholdersByOffset[2 * i], + placeholders[i - 1].offset, + placeholders[i].offset, appendTo); updatePlaceholderOffset( - placeholdersByOffset[2 * i + 1], + placeholders[i].id, appendTo.length(), offsetArray, offsetArrayLength); - appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]); + placeholderValue = &values.get(placeholders[i].id); + if (placeholderValue != &appendTo) { + appendTo.append(*placeholderValue); + } } appendRange( noPlaceholders, - placeholdersByOffset[2 * placeholderSize - 2], + placeholders[placeholderSize - 1].offset, noPlaceholders.length(), appendTo); return appendTo; } -int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) { - if (atLeast <= placeholderCapacity) { - return atLeast; +int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const { + if (placeholderSize == 0 + || firstPlaceholderReused || placeholders[0].offset != 0) { + return -1; } - // aim to double capacity each time - int32_t newCapacity = 2*atLeast - 2; + return placeholders[0].id; +} - // allocate new buffer - int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t)); - if (newBuffer == NULL) { - return placeholderCapacity; +int32_t SimplePatternFormatter::ensureCapacity( + int32_t desiredCapacity, int32_t allocationSize) { + if (allocationSize < desiredCapacity) { + allocationSize = desiredCapacity; } - - // Copy contents of old buffer to new buffer - uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t)); - - // free old buffer - if (placeholdersByOffset != placeholderBuffer) { - uprv_free(placeholdersByOffset); + if (desiredCapacity <= placeholders.getCapacity()) { + return desiredCapacity; } - - // Use new buffer - placeholdersByOffset = newBuffer; - placeholderCapacity = newCapacity; - return atLeast; + // allocate new buffer + if (placeholders.resize(allocationSize, placeholderSize) == NULL) { + return placeholders.getCapacity(); + } + return desiredCapacity; } UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) { - if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) { + if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) { return FALSE; } ++placeholderSize; - placeholdersByOffset[2 * placeholderSize - 2] = offset; - placeholdersByOffset[2 * placeholderSize - 1] = id; + PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1]; + placeholderEnd->offset = offset; + placeholderEnd->id = id; if (id >= placeholderCount) { placeholderCount = id + 1; } + if (placeholderSize > 1 + && placeholders[placeholderSize - 1].id == placeholders[0].id) { + firstPlaceholderReused = TRUE; + } return TRUE; }