]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/collationsettings.cpp
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / i18n / collationsettings.cpp
index b63d6bf3a604f320d669f53ec7bcb63ba2d1ae49..e3e76a476588b99262715f6b2310184886e27e8f 100644 (file)
@@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * collationsettings.cpp
 #include "unicode/ucol.h"
 #include "cmemory.h"
 #include "collation.h"
+#include "collationdata.h"
 #include "collationsettings.h"
 #include "sharedobject.h"
 #include "uassert.h"
 #include "umutex.h"
+#include "uvectr32.h"
 
 U_NAMESPACE_BEGIN
 
@@ -27,19 +29,12 @@ CollationSettings::CollationSettings(const CollationSettings &other)
         : SharedObject(other),
           options(other.options), variableTop(other.variableTop),
           reorderTable(NULL),
+          minHighNoReorder(other.minHighNoReorder),
+          reorderRanges(NULL), reorderRangesLength(0),
           reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
           fastLatinOptions(other.fastLatinOptions) {
-    int32_t length = other.reorderCodesLength;
-    if(length == 0) {
-        U_ASSERT(other.reorderTable == NULL);
-    } else {
-        U_ASSERT(other.reorderTable != NULL);
-        if(other.reorderCodesCapacity == 0) {
-            aliasReordering(other.reorderCodes, length, other.reorderTable);
-        } else {
-            setReordering(other.reorderCodes, length, other.reorderTable);
-        }
-    }
+    UErrorCode errorCode = U_ZERO_ERROR;
+    copyReorderingFrom(other, errorCode);
     if(fastLatinOptions >= 0) {
         uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
     }
@@ -79,14 +74,22 @@ CollationSettings::resetReordering() {
     // rather than a no-op permutation.
     // Keep the memory via reorderCodes and its capacity.
     reorderTable = NULL;
+    minHighNoReorder = 0;
+    reorderRangesLength = 0;
     reorderCodesLength = 0;
 }
 
 void
-CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table) {
-    if(length == 0) {
-        resetReordering();
-    } else {
+CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
+                                   const uint32_t *ranges, int32_t rangesLength,
+                                   const uint8_t *table, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return; }
+    if(table != NULL &&
+            (rangesLength == 0 ?
+                    !reorderTableHasSplitBytes(table) :
+                    rangesLength >= 2 &&
+                    // The first offset must be 0. The last offset must not be 0.
+                    (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
         // We need to release the memory before setting the alias pointer.
         if(reorderCodesCapacity != 0) {
             uprv_free(const_cast<int32_t *>(reorderCodes));
@@ -95,36 +98,170 @@ CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u
         reorderTable = table;
         reorderCodes = codes;
         reorderCodesLength = length;
+        // Drop ranges before the first split byte. They are reordered by the table.
+        // This then speeds up reordering of the remaining ranges.
+        int32_t firstSplitByteRangeIndex = 0;
+        while(firstSplitByteRangeIndex < rangesLength &&
+                (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
+            // The second byte of the primary limit is 0.
+            ++firstSplitByteRangeIndex;
+        }
+        if(firstSplitByteRangeIndex == rangesLength) {
+            U_ASSERT(!reorderTableHasSplitBytes(table));
+            minHighNoReorder = 0;
+            reorderRanges = NULL;
+            reorderRangesLength = 0;
+        } else {
+            U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
+            minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+            reorderRanges = ranges + firstSplitByteRangeIndex;
+            reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
+        }
+        return;
     }
+    // Regenerate missing data.
+    setReordering(data, codes, length, errorCode);
 }
 
-UBool
-CollationSettings::setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]) {
-    if(length == 0) {
+void
+CollationSettings::setReordering(const CollationData &data,
+                                 const int32_t *codes, int32_t codesLength,
+                                 UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return; }
+    if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
         resetReordering();
-    } else {
-        uint8_t *ownedTable;
-        int32_t *ownedCodes;
-        if(length <= reorderCodesCapacity) {
-            ownedTable = const_cast<uint8_t *>(reorderTable);
-            ownedCodes = const_cast<int32_t *>(reorderCodes);
-        } else {
-            // Allocate one memory block for the codes and the 16-aligned table.
-            int32_t capacity = (length + 3) & ~3;  // round up to a multiple of 4 ints
-            uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4);
-            if(bytes == NULL) { return FALSE; }
-            if(reorderCodesCapacity != 0) {
-                uprv_free(const_cast<int32_t *>(reorderCodes));
+        return;
+    }
+    UVector32 rangesList(errorCode);
+    data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
+    if(U_FAILURE(errorCode)) { return; }
+    int32_t rangesLength = rangesList.size();
+    if(rangesLength == 0) {
+        resetReordering();
+        return;
+    }
+    const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
+    // ranges[] contains at least two (limit, offset) pairs.
+    // The first offset must be 0. The last offset must not be 0.
+    // Separators (at the low end) and trailing weights (at the high end)
+    // are never reordered.
+    U_ASSERT(rangesLength >= 2);
+    U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
+    minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+
+    // Write the lead byte permutation table.
+    // Set a 0 for each lead byte that has a range boundary in the middle.
+    uint8_t table[256];
+    int32_t b = 0;
+    int32_t firstSplitByteRangeIndex = -1;
+    for(int32_t i = 0; i < rangesLength; ++i) {
+        uint32_t pair = ranges[i];
+        int32_t limit1 = (int32_t)(pair >> 24);
+        while(b < limit1) {
+            table[b] = (uint8_t)(b + pair);
+            ++b;
+        }
+        // Check the second byte of the limit.
+        if((pair & 0xff0000) != 0) {
+            table[limit1] = 0;
+            b = limit1 + 1;
+            if(firstSplitByteRangeIndex < 0) {
+                firstSplitByteRangeIndex = i;
             }
-            reorderTable = ownedTable = bytes + capacity * 4;
-            reorderCodes = ownedCodes = (int32_t *)bytes;
-            reorderCodesCapacity = capacity;
         }
-        uprv_memcpy(ownedTable, table, 256);
-        uprv_memcpy(ownedCodes, codes, length * 4);
-        reorderCodesLength = length;
     }
-    return TRUE;
+    while(b <= 0xff) {
+        table[b] = (uint8_t)b;
+        ++b;
+    }
+    if(firstSplitByteRangeIndex < 0) {
+        // The lead byte permutation table alone suffices for reordering.
+        rangesLength = 0;
+    } else {
+        // Remove the ranges below the first split byte.
+        ranges += firstSplitByteRangeIndex;
+        rangesLength -= firstSplitByteRangeIndex;
+    }
+    setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
+}
+
+void
+CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
+                                    const uint32_t *ranges, int32_t rangesLength,
+                                    const uint8_t *table, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return; }
+    int32_t *ownedCodes;
+    int32_t totalLength = codesLength + rangesLength;
+    U_ASSERT(totalLength > 0);
+    if(totalLength <= reorderCodesCapacity) {
+        ownedCodes = const_cast<int32_t *>(reorderCodes);
+    } else {
+        // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
+        int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
+        ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
+        if(ownedCodes == NULL) {
+            resetReordering();
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        if(reorderCodesCapacity != 0) {
+            uprv_free(const_cast<int32_t *>(reorderCodes));
+        }
+        reorderCodes = ownedCodes;
+        reorderCodesCapacity = capacity;
+    }
+    uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
+    uprv_memcpy(ownedCodes, codes, codesLength * 4);
+    uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
+    reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
+    reorderCodesLength = codesLength;
+    reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
+    reorderRangesLength = rangesLength;
+}
+
+void
+CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return; }
+    if(!other.hasReordering()) {
+        resetReordering();
+        return;
+    }
+    minHighNoReorder = other.minHighNoReorder;
+    if(other.reorderCodesCapacity == 0) {
+        // The reorder arrays are aliased to memory-mapped data.
+        reorderTable = other.reorderTable;
+        reorderRanges = other.reorderRanges;
+        reorderRangesLength = other.reorderRangesLength;
+        reorderCodes = other.reorderCodes;
+        reorderCodesLength = other.reorderCodesLength;
+    } else {
+        setReorderArrays(other.reorderCodes, other.reorderCodesLength,
+                         other.reorderRanges, other.reorderRangesLength,
+                         other.reorderTable, errorCode);
+    }
+}
+
+UBool
+CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
+    U_ASSERT(table[0] == 0);
+    for(int32_t i = 1; i < 256; ++i) {
+        if(table[i] == 0) {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+uint32_t
+CollationSettings::reorderEx(uint32_t p) const {
+    if(p >= minHighNoReorder) { return p; }
+    // Round up p so that its lower 16 bits are >= any offset bits.
+    // Then compare q directly with (limit, offset) pairs.
+    uint32_t q = p | 0xffff;
+    uint32_t r;
+    const uint32_t *ranges = reorderRanges;
+    while(q >= (r = *ranges)) { ++ranges; }
+    return p + (r << 24);
 }
 
 void