]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/simplepatternformatter.cpp
ICU-551.41.tar.gz
[apple/icu.git] / icuSources / common / simplepatternformatter.cpp
index 4c2cdac28d21554fa81c6def822868f5db2bfb14..0cac2ec3fd0ac6b067faee2f6c00b8d81b86c8dc 100644 (file)
@@ -9,24 +9,40 @@
 #include "cstring.h"
 #include "uassert.h"
 
-#define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof((array)[0]))
-
 U_NAMESPACE_BEGIN
 
+static UBool isInvalidArray(const void *array, int32_t size) {
+   return (size < 0 || (size > 0 && array == NULL));
+}
+
 typedef enum SimplePatternFormatterCompileState {
     INIT,
     APOSTROPHE,
     PLACEHOLDER
 } SimplePatternFormatterCompileState;
 
+// Handles parsing placeholders in the pattern string, e.g {4} or {35}
 class SimplePatternFormatterIdBuilder {
 public:
     SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
     ~SimplePatternFormatterIdBuilder() { }
+
+    // Resets so that this object has seen no placeholder ID.
     void reset() { id = 0; idLen = 0; }
+
+    // Returns the numeric placeholder ID parsed so far
     int32_t getId() const { return id; }
+
+    // Appends the numeric placeholder ID parsed so far back to a
+    // UChar buffer. Used to recover if parser using this object finds
+    // no closing curly brace.
     void appendTo(UChar *buffer, int32_t *len) const;
+
+    // Returns true if this object has seen a placeholder ID.
     UBool isValid() const { return (idLen > 0); }
+
+    // Processes a single digit character. Pattern string parser calls this
+    // as it processes digits after an opening curly brace.
     void add(UChar ch);
 private:
     int32_t id;
@@ -54,20 +70,81 @@ void SimplePatternFormatterIdBuilder::add(UChar ch) {
     idLen++;
 }
 
+// Represents placeholder values.
+class SimplePatternFormatterPlaceholderValues : public UMemory {
+public:
+    SimplePatternFormatterPlaceholderValues(
+            const UnicodeString * const *values,
+            int32_t valuesCount);
+
+    // Returns TRUE if appendTo value is at any index besides exceptIndex.
+    UBool isAppendToInAnyIndexExcept(
+            const UnicodeString &appendTo, int32_t exceptIndex) const;
+
+    // For each appendTo value, stores the snapshot of it in its place.
+    void snapshotAppendTo(const UnicodeString &appendTo);
+
+    // Returns the placeholder value at index. No range checking performed.
+    // Returned reference is valid for as long as this object exists.
+    const UnicodeString &get(int32_t index) const;
+private:
+    const UnicodeString * const *fValues;
+    int32_t fValuesCount;
+    const UnicodeString *fAppendTo;
+    UnicodeString fAppendToCopy;
+    SimplePatternFormatterPlaceholderValues(
+            const SimplePatternFormatterPlaceholderValues &);
+    SimplePatternFormatterPlaceholderValues &operator=(
+            const SimplePatternFormatterPlaceholderValues &);
+};
+
+SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
+        const UnicodeString * const *values,
+        int32_t valuesCount) 
+        : fValues(values),
+          fValuesCount(valuesCount),
+          fAppendTo(NULL),
+          fAppendToCopy() {
+}
+
+UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
+        const UnicodeString &appendTo, int32_t exceptIndex) const {
+    for (int32_t i = 0; i < fValuesCount; ++i) {
+        if (i != exceptIndex && fValues[i] == &appendTo) {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
+        const UnicodeString &appendTo) {
+    fAppendTo = &appendTo;
+    fAppendToCopy = appendTo;
+}
+
+const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
+        int32_t index) const {
+    if (fAppendTo == NULL || fAppendTo != fValues[index]) {
+        return *fValues[index];
+    }
+    return fAppendToCopy;
+}
+
 SimplePatternFormatter::SimplePatternFormatter() :
         noPlaceholders(),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
-        placeholderCount(0) {
+        placeholderCount(0),
+        firstPlaceholderReused(FALSE) {
 }
 
 SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
         noPlaceholders(),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
-        placeholderCount(0) {
+        placeholderCount(0),
+        firstPlaceholderReused(FALSE) {
     UErrorCode status = U_ZERO_ERROR;
     compile(pattern, status);
 }
@@ -75,15 +152,15 @@ SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
 SimplePatternFormatter::SimplePatternFormatter(
         const SimplePatternFormatter &other) :
         noPlaceholders(other.noPlaceholders),
-        placeholdersByOffset(placeholderBuffer),
+        placeholders(),
         placeholderSize(0),
-        placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
-        placeholderCount(other.placeholderCount) {
+        placeholderCount(other.placeholderCount),
+        firstPlaceholderReused(other.firstPlaceholderReused) {
     placeholderSize = ensureCapacity(other.placeholderSize);
     uprv_memcpy(
-            placeholdersByOffset,
-            other.placeholdersByOffset,
-            placeholderSize * 2 * sizeof(int32_t));
+            placeholders.getAlias(),
+            other.placeholders.getAlias(),
+            placeholderSize * sizeof(PlaceholderInfo));
 }
 
 SimplePatternFormatter &SimplePatternFormatter::operator=(
@@ -92,19 +169,17 @@ SimplePatternFormatter &SimplePatternFormatter::operator=(
         return *this;
     }
     noPlaceholders = other.noPlaceholders;
-    placeholderCount = other.placeholderCount;
     placeholderSize = ensureCapacity(other.placeholderSize);
+    placeholderCount = other.placeholderCount;
+    firstPlaceholderReused = other.firstPlaceholderReused;
     uprv_memcpy(
-            placeholdersByOffset,
-            other.placeholdersByOffset,
-            placeholderSize * 2 * sizeof(int32_t));
+            placeholders.getAlias(),
+            other.placeholders.getAlias(),
+            placeholderSize * sizeof(PlaceholderInfo));
     return *this;
 }
 
 SimplePatternFormatter::~SimplePatternFormatter() {
-    if (placeholdersByOffset != placeholderBuffer) {
-        uprv_free(placeholdersByOffset);
-    }
 }
 
 UBool SimplePatternFormatter::compile(
@@ -188,9 +263,9 @@ UnicodeString& SimplePatternFormatter::format(
         UnicodeString &appendTo,
         UErrorCode &status) const {
     const UnicodeString *params[] = {&arg0};
-    return format(
+    return formatAndAppend(
             params,
-            LENGTHOF(params),
+            UPRV_LENGTHOF(params),
             appendTo,
             NULL,
             0,
@@ -203,9 +278,9 @@ UnicodeString& SimplePatternFormatter::format(
         UnicodeString &appendTo,
         UErrorCode &status) const {
     const UnicodeString *params[] = {&arg0, &arg1};
-    return format(
+    return formatAndAppend(
             params,
-            LENGTHOF(params),
+            UPRV_LENGTHOF(params),
             appendTo,
             NULL,
             0,
@@ -219,9 +294,9 @@ UnicodeString& SimplePatternFormatter::format(
         UnicodeString &appendTo,
         UErrorCode &status) const {
     const UnicodeString *params[] = {&arg0, &arg1, &arg2};
-    return format(
+    return formatAndAppend(
             params,
-            LENGTHOF(params),
+            UPRV_LENGTHOF(params),
             appendTo,
             NULL,
             0,
@@ -243,10 +318,14 @@ static void appendRange(
         int32_t start,
         int32_t end,
         UnicodeString &dest) {
+    // This check improves performance significantly.
+    if (start == end) {
+        return;
+    }
     dest.append(src, start, end - start);
 }
 
-UnicodeString& SimplePatternFormatter::format(
+UnicodeString& SimplePatternFormatter::formatAndAppend(
         const UnicodeString * const *placeholderValues,
         int32_t placeholderValueCount,
         UnicodeString &appendTo,
@@ -256,10 +335,102 @@ UnicodeString& SimplePatternFormatter::format(
     if (U_FAILURE(status)) {
         return appendTo;
     }
+    if (isInvalidArray(placeholderValues, placeholderValueCount)
+            || isInvalidArray(offsetArray, offsetArrayLength)) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return appendTo;
+    }
     if (placeholderValueCount < placeholderCount) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return appendTo;
     }
+    
+    // Since we are disallowing parameter values that are the same as
+    // appendTo, we have to check all placeholderValues as opposed to
+    // the first placeholderCount placeholder values.
+    SimplePatternFormatterPlaceholderValues values(
+            placeholderValues, placeholderValueCount);
+    if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return appendTo;
+    }
+    return formatAndAppend(
+            values,
+            appendTo,
+            offsetArray,
+            offsetArrayLength);
+}
+
+UnicodeString& SimplePatternFormatter::formatAndReplace(
+        const UnicodeString * const *placeholderValues,
+        int32_t placeholderValueCount,
+        UnicodeString &result,
+        int32_t *offsetArray,
+        int32_t offsetArrayLength,
+        UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return result;
+    }
+    if (isInvalidArray(placeholderValues, placeholderValueCount)
+            || isInvalidArray(offsetArray, offsetArrayLength)) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return result;
+    }
+    if (placeholderValueCount < placeholderCount) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return result;
+    }
+    SimplePatternFormatterPlaceholderValues values(
+            placeholderValues, placeholderCount);
+    int32_t placeholderAtStart = getUniquePlaceholderAtStart();
+
+    // If pattern starts with a unique placeholder and that placeholder
+    // value is result, we may be able to optimize by just appending to result.
+    if (placeholderAtStart >= 0
+            && placeholderValues[placeholderAtStart] == &result) {
+
+        // If result is the value for other placeholders, call off optimization.
+        if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
+            values.snapshotAppendTo(result);
+            result.remove();
+            return formatAndAppend(
+                    values,
+                    result,
+                    offsetArray,
+                    offsetArrayLength);
+        }
+
+        // Otherwise we can optimize
+        formatAndAppend(
+                values,
+                result,
+                offsetArray,
+                offsetArrayLength);
+        
+        // We have to make the offset for the placeholderAtStart
+        // placeholder be 0. Otherwise it would be the length of the
+        // previous value of result.
+        if (offsetArrayLength > placeholderAtStart) {
+            offsetArray[placeholderAtStart] = 0;
+        }
+        return result;
+    }
+    if (values.isAppendToInAnyIndexExcept(result, -1)) {
+        values.snapshotAppendTo(result);
+    }
+    result.remove();
+    return formatAndAppend(
+            values,
+            result,
+            offsetArray,
+            offsetArrayLength);
+}
+
+UnicodeString& SimplePatternFormatter::formatAndAppend(
+        const SimplePatternFormatterPlaceholderValues &values,
+        UnicodeString &appendTo,
+        int32_t *offsetArray,
+        int32_t offsetArrayLength) const {
     for (int32_t i = 0; i < offsetArrayLength; ++i) {
         offsetArray[i] = -1;
     }
@@ -270,72 +441,79 @@ UnicodeString& SimplePatternFormatter::format(
     appendRange(
             noPlaceholders,
             0,
-            placeholdersByOffset[0],
+            placeholders[0].offset,
             appendTo);
     updatePlaceholderOffset(
-            placeholdersByOffset[1],
+            placeholders[0].id,
             appendTo.length(),
             offsetArray,
             offsetArrayLength);
-    appendTo.append(*placeholderValues[placeholdersByOffset[1]]);
+    const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
+    if (placeholderValue != &appendTo) {
+        appendTo.append(*placeholderValue);
+    }
     for (int32_t i = 1; i < placeholderSize; ++i) {
         appendRange(
                 noPlaceholders,
-                placeholdersByOffset[2 * i - 2],
-                placeholdersByOffset[2 * i],
+                placeholders[i - 1].offset,
+                placeholders[i].offset,
                 appendTo);
         updatePlaceholderOffset(
-                placeholdersByOffset[2 * i + 1],
+                placeholders[i].id,
                 appendTo.length(),
                 offsetArray,
                 offsetArrayLength);
-        appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]);
+        placeholderValue = &values.get(placeholders[i].id);
+        if (placeholderValue != &appendTo) {
+            appendTo.append(*placeholderValue);
+        }
     }
     appendRange(
             noPlaceholders,
-            placeholdersByOffset[2 * placeholderSize - 2],
+            placeholders[placeholderSize - 1].offset,
             noPlaceholders.length(),
             appendTo);
     return appendTo;
 }
 
-int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) {
-    if (atLeast <= placeholderCapacity) {
-        return atLeast;
+int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
+    if (placeholderSize == 0
+            || firstPlaceholderReused || placeholders[0].offset != 0) {
+        return -1;
     }
-    // aim to double capacity each time
-    int32_t newCapacity = 2*atLeast - 2;
+    return placeholders[0].id;
+}
 
-    // allocate new buffer
-    int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t));
-    if (newBuffer == NULL) {
-        return placeholderCapacity;
+int32_t SimplePatternFormatter::ensureCapacity(
+        int32_t desiredCapacity, int32_t allocationSize) {
+    if (allocationSize < desiredCapacity) {
+        allocationSize = desiredCapacity;
     }
-
-    // Copy contents of old buffer to new buffer
-    uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t));
-
-    // free old buffer
-    if (placeholdersByOffset != placeholderBuffer) {
-        uprv_free(placeholdersByOffset);
+    if (desiredCapacity <= placeholders.getCapacity()) {
+        return desiredCapacity;
     }
-
-    // Use new buffer
-    placeholdersByOffset = newBuffer;
-    placeholderCapacity = newCapacity;
-    return atLeast;
+    // allocate new buffer
+    if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
+        return placeholders.getCapacity();
+    }
+    return desiredCapacity;
 }
 
 UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
-    if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) {
+    if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
         return FALSE;
     }
     ++placeholderSize;
-    placeholdersByOffset[2 * placeholderSize - 2] = offset;
-    placeholdersByOffset[2 * placeholderSize - 1] = id;
+    PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
+    placeholderEnd->offset = offset;
+    placeholderEnd->id = id;
     if (id >= placeholderCount) {
         placeholderCount = id + 1;
     }
+    if (placeholderSize > 1
+            && placeholders[placeholderSize - 1].id == placeholders[0].id) {
+        firstPlaceholderReused = TRUE;
+    }
     return TRUE;
 }