X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/08b89b0a244153b9f5bbb2f49c55ab0f7298122e..b331163bffd790ced0e88b73f44f86d49ccc48a5:/icuSources/i18n/collationsettings.cpp diff --git a/icuSources/i18n/collationsettings.cpp b/icuSources/i18n/collationsettings.cpp index b63d6bf3..e3e76a47 100644 --- a/icuSources/i18n/collationsettings.cpp +++ b/icuSources/i18n/collationsettings.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2013-2014, International Business Machines +* Copyright (C) 2013-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * collationsettings.cpp @@ -16,10 +16,12 @@ #include "unicode/ucol.h" #include "cmemory.h" #include "collation.h" +#include "collationdata.h" #include "collationsettings.h" #include "sharedobject.h" #include "uassert.h" #include "umutex.h" +#include "uvectr32.h" U_NAMESPACE_BEGIN @@ -27,19 +29,12 @@ CollationSettings::CollationSettings(const CollationSettings &other) : SharedObject(other), options(other.options), variableTop(other.variableTop), reorderTable(NULL), + minHighNoReorder(other.minHighNoReorder), + reorderRanges(NULL), reorderRangesLength(0), reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), fastLatinOptions(other.fastLatinOptions) { - int32_t length = other.reorderCodesLength; - if(length == 0) { - U_ASSERT(other.reorderTable == NULL); - } else { - U_ASSERT(other.reorderTable != NULL); - if(other.reorderCodesCapacity == 0) { - aliasReordering(other.reorderCodes, length, other.reorderTable); - } else { - setReordering(other.reorderCodes, length, other.reorderTable); - } - } + UErrorCode errorCode = U_ZERO_ERROR; + copyReorderingFrom(other, errorCode); if(fastLatinOptions >= 0) { uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); } @@ -79,14 +74,22 @@ CollationSettings::resetReordering() { // rather than a no-op permutation. // Keep the memory via reorderCodes and its capacity. reorderTable = NULL; + minHighNoReorder = 0; + reorderRangesLength = 0; reorderCodesLength = 0; } void -CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table) { - if(length == 0) { - resetReordering(); - } else { +CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, + const uint32_t *ranges, int32_t rangesLength, + const uint8_t *table, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + if(table != NULL && + (rangesLength == 0 ? + !reorderTableHasSplitBytes(table) : + rangesLength >= 2 && + // The first offset must be 0. The last offset must not be 0. + (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { // We need to release the memory before setting the alias pointer. if(reorderCodesCapacity != 0) { uprv_free(const_cast(reorderCodes)); @@ -95,36 +98,170 @@ CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u reorderTable = table; reorderCodes = codes; reorderCodesLength = length; + // Drop ranges before the first split byte. They are reordered by the table. + // This then speeds up reordering of the remaining ranges. + int32_t firstSplitByteRangeIndex = 0; + while(firstSplitByteRangeIndex < rangesLength && + (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { + // The second byte of the primary limit is 0. + ++firstSplitByteRangeIndex; + } + if(firstSplitByteRangeIndex == rangesLength) { + U_ASSERT(!reorderTableHasSplitBytes(table)); + minHighNoReorder = 0; + reorderRanges = NULL; + reorderRangesLength = 0; + } else { + U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); + minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; + reorderRanges = ranges + firstSplitByteRangeIndex; + reorderRangesLength = rangesLength - firstSplitByteRangeIndex; + } + return; } + // Regenerate missing data. + setReordering(data, codes, length, errorCode); } -UBool -CollationSettings::setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]) { - if(length == 0) { +void +CollationSettings::setReordering(const CollationData &data, + const int32_t *codes, int32_t codesLength, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { resetReordering(); - } else { - uint8_t *ownedTable; - int32_t *ownedCodes; - if(length <= reorderCodesCapacity) { - ownedTable = const_cast(reorderTable); - ownedCodes = const_cast(reorderCodes); - } else { - // Allocate one memory block for the codes and the 16-aligned table. - int32_t capacity = (length + 3) & ~3; // round up to a multiple of 4 ints - uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4); - if(bytes == NULL) { return FALSE; } - if(reorderCodesCapacity != 0) { - uprv_free(const_cast(reorderCodes)); + return; + } + UVector32 rangesList(errorCode); + data.makeReorderRanges(codes, codesLength, rangesList, errorCode); + if(U_FAILURE(errorCode)) { return; } + int32_t rangesLength = rangesList.size(); + if(rangesLength == 0) { + resetReordering(); + return; + } + const uint32_t *ranges = reinterpret_cast(rangesList.getBuffer()); + // ranges[] contains at least two (limit, offset) pairs. + // The first offset must be 0. The last offset must not be 0. + // Separators (at the low end) and trailing weights (at the high end) + // are never reordered. + U_ASSERT(rangesLength >= 2); + U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); + minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; + + // Write the lead byte permutation table. + // Set a 0 for each lead byte that has a range boundary in the middle. + uint8_t table[256]; + int32_t b = 0; + int32_t firstSplitByteRangeIndex = -1; + for(int32_t i = 0; i < rangesLength; ++i) { + uint32_t pair = ranges[i]; + int32_t limit1 = (int32_t)(pair >> 24); + while(b < limit1) { + table[b] = (uint8_t)(b + pair); + ++b; + } + // Check the second byte of the limit. + if((pair & 0xff0000) != 0) { + table[limit1] = 0; + b = limit1 + 1; + if(firstSplitByteRangeIndex < 0) { + firstSplitByteRangeIndex = i; } - reorderTable = ownedTable = bytes + capacity * 4; - reorderCodes = ownedCodes = (int32_t *)bytes; - reorderCodesCapacity = capacity; } - uprv_memcpy(ownedTable, table, 256); - uprv_memcpy(ownedCodes, codes, length * 4); - reorderCodesLength = length; } - return TRUE; + while(b <= 0xff) { + table[b] = (uint8_t)b; + ++b; + } + if(firstSplitByteRangeIndex < 0) { + // The lead byte permutation table alone suffices for reordering. + rangesLength = 0; + } else { + // Remove the ranges below the first split byte. + ranges += firstSplitByteRangeIndex; + rangesLength -= firstSplitByteRangeIndex; + } + setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); +} + +void +CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, + const uint32_t *ranges, int32_t rangesLength, + const uint8_t *table, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + int32_t *ownedCodes; + int32_t totalLength = codesLength + rangesLength; + U_ASSERT(totalLength > 0); + if(totalLength <= reorderCodesCapacity) { + ownedCodes = const_cast(reorderCodes); + } else { + // Allocate one memory block for the codes, the ranges, and the 16-aligned table. + int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints + ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); + if(ownedCodes == NULL) { + resetReordering(); + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + if(reorderCodesCapacity != 0) { + uprv_free(const_cast(reorderCodes)); + } + reorderCodes = ownedCodes; + reorderCodesCapacity = capacity; + } + uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); + uprv_memcpy(ownedCodes, codes, codesLength * 4); + uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); + reorderTable = reinterpret_cast(reorderCodes + reorderCodesCapacity); + reorderCodesLength = codesLength; + reorderRanges = reinterpret_cast(ownedCodes) + codesLength; + reorderRangesLength = rangesLength; +} + +void +CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } + if(!other.hasReordering()) { + resetReordering(); + return; + } + minHighNoReorder = other.minHighNoReorder; + if(other.reorderCodesCapacity == 0) { + // The reorder arrays are aliased to memory-mapped data. + reorderTable = other.reorderTable; + reorderRanges = other.reorderRanges; + reorderRangesLength = other.reorderRangesLength; + reorderCodes = other.reorderCodes; + reorderCodesLength = other.reorderCodesLength; + } else { + setReorderArrays(other.reorderCodes, other.reorderCodesLength, + other.reorderRanges, other.reorderRangesLength, + other.reorderTable, errorCode); + } +} + +UBool +CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { + U_ASSERT(table[0] == 0); + for(int32_t i = 1; i < 256; ++i) { + if(table[i] == 0) { + return TRUE; + } + } + return FALSE; +} + +uint32_t +CollationSettings::reorderEx(uint32_t p) const { + if(p >= minHighNoReorder) { return p; } + // Round up p so that its lower 16 bits are >= any offset bits. + // Then compare q directly with (limit, offset) pairs. + uint32_t q = p | 0xffff; + uint32_t r; + const uint32_t *ranges = reorderRanges; + while(q >= (r = *ranges)) { ++ranges; } + return p + (r << 24); } void