#include "cstring.h"
#include "uassert.h"
-#define LENGTHOF(array) (int32_t)(sizeof(array) / sizeof((array)[0]))
-
U_NAMESPACE_BEGIN
+static UBool isInvalidArray(const void *array, int32_t size) {
+ return (size < 0 || (size > 0 && array == NULL));
+}
+
typedef enum SimplePatternFormatterCompileState {
INIT,
APOSTROPHE,
PLACEHOLDER
} SimplePatternFormatterCompileState;
+// Handles parsing placeholders in the pattern string, e.g {4} or {35}
class SimplePatternFormatterIdBuilder {
public:
SimplePatternFormatterIdBuilder() : id(0), idLen(0) { }
~SimplePatternFormatterIdBuilder() { }
+
+ // Resets so that this object has seen no placeholder ID.
void reset() { id = 0; idLen = 0; }
+
+ // Returns the numeric placeholder ID parsed so far
int32_t getId() const { return id; }
+
+ // Appends the numeric placeholder ID parsed so far back to a
+ // UChar buffer. Used to recover if parser using this object finds
+ // no closing curly brace.
void appendTo(UChar *buffer, int32_t *len) const;
+
+ // Returns true if this object has seen a placeholder ID.
UBool isValid() const { return (idLen > 0); }
+
+ // Processes a single digit character. Pattern string parser calls this
+ // as it processes digits after an opening curly brace.
void add(UChar ch);
private:
int32_t id;
idLen++;
}
+// Represents placeholder values.
+class SimplePatternFormatterPlaceholderValues : public UMemory {
+public:
+ SimplePatternFormatterPlaceholderValues(
+ const UnicodeString * const *values,
+ int32_t valuesCount);
+
+ // Returns TRUE if appendTo value is at any index besides exceptIndex.
+ UBool isAppendToInAnyIndexExcept(
+ const UnicodeString &appendTo, int32_t exceptIndex) const;
+
+ // For each appendTo value, stores the snapshot of it in its place.
+ void snapshotAppendTo(const UnicodeString &appendTo);
+
+ // Returns the placeholder value at index. No range checking performed.
+ // Returned reference is valid for as long as this object exists.
+ const UnicodeString &get(int32_t index) const;
+private:
+ const UnicodeString * const *fValues;
+ int32_t fValuesCount;
+ const UnicodeString *fAppendTo;
+ UnicodeString fAppendToCopy;
+ SimplePatternFormatterPlaceholderValues(
+ const SimplePatternFormatterPlaceholderValues &);
+ SimplePatternFormatterPlaceholderValues &operator=(
+ const SimplePatternFormatterPlaceholderValues &);
+};
+
+SimplePatternFormatterPlaceholderValues::SimplePatternFormatterPlaceholderValues(
+ const UnicodeString * const *values,
+ int32_t valuesCount)
+ : fValues(values),
+ fValuesCount(valuesCount),
+ fAppendTo(NULL),
+ fAppendToCopy() {
+}
+
+UBool SimplePatternFormatterPlaceholderValues::isAppendToInAnyIndexExcept(
+ const UnicodeString &appendTo, int32_t exceptIndex) const {
+ for (int32_t i = 0; i < fValuesCount; ++i) {
+ if (i != exceptIndex && fValues[i] == &appendTo) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+void SimplePatternFormatterPlaceholderValues::snapshotAppendTo(
+ const UnicodeString &appendTo) {
+ fAppendTo = &appendTo;
+ fAppendToCopy = appendTo;
+}
+
+const UnicodeString &SimplePatternFormatterPlaceholderValues::get(
+ int32_t index) const {
+ if (fAppendTo == NULL || fAppendTo != fValues[index]) {
+ return *fValues[index];
+ }
+ return fAppendToCopy;
+}
+
SimplePatternFormatter::SimplePatternFormatter() :
noPlaceholders(),
- placeholdersByOffset(placeholderBuffer),
+ placeholders(),
placeholderSize(0),
- placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
- placeholderCount(0) {
+ placeholderCount(0),
+ firstPlaceholderReused(FALSE) {
}
SimplePatternFormatter::SimplePatternFormatter(const UnicodeString &pattern) :
noPlaceholders(),
- placeholdersByOffset(placeholderBuffer),
+ placeholders(),
placeholderSize(0),
- placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
- placeholderCount(0) {
+ placeholderCount(0),
+ firstPlaceholderReused(FALSE) {
UErrorCode status = U_ZERO_ERROR;
compile(pattern, status);
}
SimplePatternFormatter::SimplePatternFormatter(
const SimplePatternFormatter &other) :
noPlaceholders(other.noPlaceholders),
- placeholdersByOffset(placeholderBuffer),
+ placeholders(),
placeholderSize(0),
- placeholderCapacity(EXPECTED_PLACEHOLDER_COUNT),
- placeholderCount(other.placeholderCount) {
+ placeholderCount(other.placeholderCount),
+ firstPlaceholderReused(other.firstPlaceholderReused) {
placeholderSize = ensureCapacity(other.placeholderSize);
uprv_memcpy(
- placeholdersByOffset,
- other.placeholdersByOffset,
- placeholderSize * 2 * sizeof(int32_t));
+ placeholders.getAlias(),
+ other.placeholders.getAlias(),
+ placeholderSize * sizeof(PlaceholderInfo));
}
SimplePatternFormatter &SimplePatternFormatter::operator=(
return *this;
}
noPlaceholders = other.noPlaceholders;
- placeholderCount = other.placeholderCount;
placeholderSize = ensureCapacity(other.placeholderSize);
+ placeholderCount = other.placeholderCount;
+ firstPlaceholderReused = other.firstPlaceholderReused;
uprv_memcpy(
- placeholdersByOffset,
- other.placeholdersByOffset,
- placeholderSize * 2 * sizeof(int32_t));
+ placeholders.getAlias(),
+ other.placeholders.getAlias(),
+ placeholderSize * sizeof(PlaceholderInfo));
return *this;
}
SimplePatternFormatter::~SimplePatternFormatter() {
- if (placeholdersByOffset != placeholderBuffer) {
- uprv_free(placeholdersByOffset);
- }
}
UBool SimplePatternFormatter::compile(
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0};
- return format(
+ return formatAndAppend(
params,
- LENGTHOF(params),
+ UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1};
- return format(
+ return formatAndAppend(
params,
- LENGTHOF(params),
+ UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
UnicodeString &appendTo,
UErrorCode &status) const {
const UnicodeString *params[] = {&arg0, &arg1, &arg2};
- return format(
+ return formatAndAppend(
params,
- LENGTHOF(params),
+ UPRV_LENGTHOF(params),
appendTo,
NULL,
0,
int32_t start,
int32_t end,
UnicodeString &dest) {
+ // This check improves performance significantly.
+ if (start == end) {
+ return;
+ }
dest.append(src, start, end - start);
}
-UnicodeString& SimplePatternFormatter::format(
+UnicodeString& SimplePatternFormatter::formatAndAppend(
const UnicodeString * const *placeholderValues,
int32_t placeholderValueCount,
UnicodeString &appendTo,
if (U_FAILURE(status)) {
return appendTo;
}
+ if (isInvalidArray(placeholderValues, placeholderValueCount)
+ || isInvalidArray(offsetArray, offsetArrayLength)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+ }
if (placeholderValueCount < placeholderCount) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
+
+ // Since we are disallowing parameter values that are the same as
+ // appendTo, we have to check all placeholderValues as opposed to
+ // the first placeholderCount placeholder values.
+ SimplePatternFormatterPlaceholderValues values(
+ placeholderValues, placeholderValueCount);
+ if (values.isAppendToInAnyIndexExcept(appendTo, -1)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return appendTo;
+ }
+ return formatAndAppend(
+ values,
+ appendTo,
+ offsetArray,
+ offsetArrayLength);
+}
+
+UnicodeString& SimplePatternFormatter::formatAndReplace(
+ const UnicodeString * const *placeholderValues,
+ int32_t placeholderValueCount,
+ UnicodeString &result,
+ int32_t *offsetArray,
+ int32_t offsetArrayLength,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) {
+ return result;
+ }
+ if (isInvalidArray(placeholderValues, placeholderValueCount)
+ || isInvalidArray(offsetArray, offsetArrayLength)) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+ if (placeholderValueCount < placeholderCount) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return result;
+ }
+ SimplePatternFormatterPlaceholderValues values(
+ placeholderValues, placeholderCount);
+ int32_t placeholderAtStart = getUniquePlaceholderAtStart();
+
+ // If pattern starts with a unique placeholder and that placeholder
+ // value is result, we may be able to optimize by just appending to result.
+ if (placeholderAtStart >= 0
+ && placeholderValues[placeholderAtStart] == &result) {
+
+ // If result is the value for other placeholders, call off optimization.
+ if (values.isAppendToInAnyIndexExcept(result, placeholderAtStart)) {
+ values.snapshotAppendTo(result);
+ result.remove();
+ return formatAndAppend(
+ values,
+ result,
+ offsetArray,
+ offsetArrayLength);
+ }
+
+ // Otherwise we can optimize
+ formatAndAppend(
+ values,
+ result,
+ offsetArray,
+ offsetArrayLength);
+
+ // We have to make the offset for the placeholderAtStart
+ // placeholder be 0. Otherwise it would be the length of the
+ // previous value of result.
+ if (offsetArrayLength > placeholderAtStart) {
+ offsetArray[placeholderAtStart] = 0;
+ }
+ return result;
+ }
+ if (values.isAppendToInAnyIndexExcept(result, -1)) {
+ values.snapshotAppendTo(result);
+ }
+ result.remove();
+ return formatAndAppend(
+ values,
+ result,
+ offsetArray,
+ offsetArrayLength);
+}
+
+UnicodeString& SimplePatternFormatter::formatAndAppend(
+ const SimplePatternFormatterPlaceholderValues &values,
+ UnicodeString &appendTo,
+ int32_t *offsetArray,
+ int32_t offsetArrayLength) const {
for (int32_t i = 0; i < offsetArrayLength; ++i) {
offsetArray[i] = -1;
}
appendRange(
noPlaceholders,
0,
- placeholdersByOffset[0],
+ placeholders[0].offset,
appendTo);
updatePlaceholderOffset(
- placeholdersByOffset[1],
+ placeholders[0].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
- appendTo.append(*placeholderValues[placeholdersByOffset[1]]);
+ const UnicodeString *placeholderValue = &values.get(placeholders[0].id);
+ if (placeholderValue != &appendTo) {
+ appendTo.append(*placeholderValue);
+ }
for (int32_t i = 1; i < placeholderSize; ++i) {
appendRange(
noPlaceholders,
- placeholdersByOffset[2 * i - 2],
- placeholdersByOffset[2 * i],
+ placeholders[i - 1].offset,
+ placeholders[i].offset,
appendTo);
updatePlaceholderOffset(
- placeholdersByOffset[2 * i + 1],
+ placeholders[i].id,
appendTo.length(),
offsetArray,
offsetArrayLength);
- appendTo.append(*placeholderValues[placeholdersByOffset[2 * i + 1]]);
+ placeholderValue = &values.get(placeholders[i].id);
+ if (placeholderValue != &appendTo) {
+ appendTo.append(*placeholderValue);
+ }
}
appendRange(
noPlaceholders,
- placeholdersByOffset[2 * placeholderSize - 2],
+ placeholders[placeholderSize - 1].offset,
noPlaceholders.length(),
appendTo);
return appendTo;
}
-int32_t SimplePatternFormatter::ensureCapacity(int32_t atLeast) {
- if (atLeast <= placeholderCapacity) {
- return atLeast;
+int32_t SimplePatternFormatter::getUniquePlaceholderAtStart() const {
+ if (placeholderSize == 0
+ || firstPlaceholderReused || placeholders[0].offset != 0) {
+ return -1;
}
- // aim to double capacity each time
- int32_t newCapacity = 2*atLeast - 2;
+ return placeholders[0].id;
+}
- // allocate new buffer
- int32_t *newBuffer = (int32_t *) uprv_malloc(2 * newCapacity * sizeof(int32_t));
- if (newBuffer == NULL) {
- return placeholderCapacity;
+int32_t SimplePatternFormatter::ensureCapacity(
+ int32_t desiredCapacity, int32_t allocationSize) {
+ if (allocationSize < desiredCapacity) {
+ allocationSize = desiredCapacity;
}
-
- // Copy contents of old buffer to new buffer
- uprv_memcpy(newBuffer, placeholdersByOffset, 2 * placeholderSize * sizeof(int32_t));
-
- // free old buffer
- if (placeholdersByOffset != placeholderBuffer) {
- uprv_free(placeholdersByOffset);
+ if (desiredCapacity <= placeholders.getCapacity()) {
+ return desiredCapacity;
}
-
- // Use new buffer
- placeholdersByOffset = newBuffer;
- placeholderCapacity = newCapacity;
- return atLeast;
+ // allocate new buffer
+ if (placeholders.resize(allocationSize, placeholderSize) == NULL) {
+ return placeholders.getCapacity();
+ }
+ return desiredCapacity;
}
UBool SimplePatternFormatter::addPlaceholder(int32_t id, int32_t offset) {
- if (ensureCapacity(placeholderSize + 1) < placeholderSize + 1) {
+ if (ensureCapacity(placeholderSize + 1, 2 * placeholderSize) < placeholderSize + 1) {
return FALSE;
}
++placeholderSize;
- placeholdersByOffset[2 * placeholderSize - 2] = offset;
- placeholdersByOffset[2 * placeholderSize - 1] = id;
+ PlaceholderInfo *placeholderEnd = &placeholders[placeholderSize - 1];
+ placeholderEnd->offset = offset;
+ placeholderEnd->id = id;
if (id >= placeholderCount) {
placeholderCount = id + 1;
}
+ if (placeholderSize > 1
+ && placeholders[placeholderSize - 1].id == placeholders[0].id) {
+ firstPlaceholderReused = TRUE;
+ }
return TRUE;
}