X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..a01113dcd0f39d5da295ef82785beff9ed86fe38:/icuSources/tools/toolutil/swapimpl.cpp?ds=sidebyside diff --git a/icuSources/tools/toolutil/swapimpl.cpp b/icuSources/tools/toolutil/swapimpl.cpp index 639189dd..926755a2 100644 --- a/icuSources/tools/toolutil/swapimpl.cpp +++ b/icuSources/tools/toolutil/swapimpl.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2005-2010, International Business Machines +* Copyright (C) 2005-2014, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: swapimpl.cpp -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -39,6 +41,7 @@ #include "uarrsort.h" #include "ucmndata.h" #include "udataswp.h" +#include "ulayout_props.h" /* swapping implementations in common */ @@ -54,8 +57,9 @@ #include "sprpimpl.h" #include "propname.h" #include "rbbidata.h" -#include "triedict.h" +#include "utrie.h" #include "utrie2.h" +#include "dictionarydata.h" /* swapping implementations in i18n */ @@ -63,14 +67,98 @@ #include "uspoof_impl.h" #endif +U_NAMESPACE_USE /* definitions */ -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +/* Unicode property (value) aliases data swapping --------------------------- */ + +static int32_t U_CALLCONV +upname_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + /* udata_swapDataHeader checks the arguments */ + int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + const UDataInfo *pInfo= + reinterpret_cast( + static_cast(inData)+4); + if(!( + pInfo->dataFormat[0]==0x70 && /* dataFormat="pnam" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x61 && + pInfo->dataFormat[3]==0x6d && + pInfo->formatVersion[0]==2 + )) { + udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + const uint8_t *inBytes=static_cast(inData)+headerSize; + uint8_t *outBytes=static_cast(outData)+headerSize; + + if(length>=0) { + length-=headerSize; + // formatVersion 2 initially has indexes[8], 32 bytes. + if(length<32) { + udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n", + (int)length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + const int32_t *inIndexes=reinterpret_cast(inBytes); + int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]); + if(length>=0) { + if(lengthswapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode); + + // Copy the rest of the data. + if(inBytes!=outBytes) { + uprv_memcpy(outBytes+numBytesIndexesAndValueMaps, + inBytes+numBytesIndexesAndValueMaps, + totalSize-numBytesIndexesAndValueMaps); + } + + // We need not swap anything else: + // + // The ByteTries are already byte-serialized, and are fixed on ASCII. + // (On an EBCDIC machine, the input string is converted to lowercase ASCII + // while matching.) + // + // The name groups are mostly invariant characters, but since we only + // generate, and keep in subversion, ASCII versions of pnames.icu, + // and since only ICU4J uses the pnames.icu data file + // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files, + // we just copy those bytes too. + } + + return headerSize+totalSize; +} /* Unicode properties data swapping ----------------------------------------- */ -U_CAPI int32_t U_EXPORT2 +static int32_t U_CALLCONV uprops_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { @@ -146,7 +234,7 @@ uprops_swap(const UDataSwapper *ds, /* copy everything for inaccessible data (padding) */ if(inData32!=outData32) { - uprv_memcpy(outData32, inData32, 4*dataTop); + uprv_memcpy(outData32, inData32, 4*(size_t)dataTop); } /* swap the indexes[16] */ @@ -156,7 +244,7 @@ uprops_swap(const UDataSwapper *ds, * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, @@ -187,7 +275,7 @@ uprops_swap(const UDataSwapper *ds, * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], @@ -218,7 +306,7 @@ uprops_swap(const UDataSwapper *ds, /* Unicode case mapping data swapping --------------------------------------- */ -U_CAPI int32_t U_EXPORT2 +static int32_t U_CALLCONV ucase_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { @@ -249,7 +337,7 @@ ucase_swap(const UDataSwapper *ds, ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || - pInfo->formatVersion[0]==2) + (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4)) )) { udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], @@ -304,7 +392,7 @@ ucase_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UCASE_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t exceptions[] and unfold[] */ @@ -320,7 +408,7 @@ ucase_swap(const UDataSwapper *ds, /* Unicode bidi/shaping data swapping --------------------------------------- */ -U_CAPI int32_t U_EXPORT2 +static int32_t U_CALLCONV ubidi_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { @@ -406,7 +494,7 @@ ubidi_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ @@ -414,9 +502,11 @@ ubidi_swap(const UDataSwapper *ds, ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; - /* just skip the uint8_t jgArray[] */ + /* just skip the uint8_t jgArray[] and jgArray2[] */ count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START]; offset+=count; + count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2]; + offset+=count; U_ASSERT(offset==size); } @@ -428,7 +518,7 @@ ubidi_swap(const UDataSwapper *ds, #if !UCONFIG_NO_NORMALIZATION -U_CAPI int32_t U_EXPORT2 +static int32_t U_CALLCONV unorm_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { @@ -551,8 +641,108 @@ unorm_swap(const UDataSwapper *ds, #endif +// Unicode text layout properties data swapping -------------------------------- + +static int32_t U_CALLCONV +ulayout_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + // udata_swapDataHeader checks the arguments. + int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) { + return 0; + } + + // Check data format and format version. + const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); + if (!( + pInfo->dataFormat[0] == ULAYOUT_FMT_0 && // dataFormat="Layo" + pInfo->dataFormat[1] == ULAYOUT_FMT_1 && + pInfo->dataFormat[2] == ULAYOUT_FMT_2 && + pInfo->dataFormat[3] == ULAYOUT_FMT_3 && + pInfo->formatVersion[0] == 1)) { + udata_printError(ds, + "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) " + "is not recognized as text layout properties data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + const uint8_t *inBytes = (const uint8_t *)inData + headerSize; + uint8_t *outBytes = (uint8_t *)outData + headerSize; + + const int32_t *inIndexes = (const int32_t *)inBytes; + + if (length >= 0) { + length -= headerSize; + if (length < 12 * 4) { + udata_printError(ds, + "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n", + length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]); + if (indexesLength < 12) { + udata_printError(ds, + "ulayout_swap(): too few indexes (%d) for text layout properties data\n", + indexesLength); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Read the data offsets before swapping anything. + int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1]; + for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) { + indexes[i] = udata_readInt32(ds, inIndexes[i]); + } + int32_t size = indexes[ULAYOUT_IX_TRIES_TOP]; + + if (length >= 0) { + if (length < size) { + udata_printError(ds, + "ulayout_swap(): too few bytes (%d after header) " + "for all of text layout properties data\n", + length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Copy the data for inaccessible bytes. + if (inBytes != outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + // Swap the int32_t indexes[]. + int32_t offset = 0; + int32_t count = indexesLength * 4; + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); + offset += count; + + // Swap each trie. + for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) { + int32_t top = indexes[i]; + count = top - offset; + U_ASSERT(count >= 0); + if (count >= 16) { + utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode); + } + offset = top; + } + + U_ASSERT(offset == size); + } + + return headerSize + size; +} + /* Swap 'Test' data from gentest */ -U_CAPI int32_t U_EXPORT2 +static int32_t U_CALLCONV test_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode) { @@ -567,7 +757,7 @@ test_swap(const UDataSwapper *ds, /* udata_swapDataHeader checks the arguments */ headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - udata_printError(ds, "test_swap(): data header swap failed %s\n", u_errorName(*pErrorCode)); + udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != NULL ? u_errorName(*pErrorCode) : "pErrorCode is NULL"); return 0; } @@ -642,13 +832,17 @@ static const struct { { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap }, /* dataFormat="Norm" */ { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap }, /* dataFormat="Nrm2" */ #endif + + { { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 }, + ulayout_swap }, // dataFormat="Layo" + #if !UCONFIG_NO_COLLATION { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap }, /* dataFormat="UCol" */ { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */ #endif #if !UCONFIG_NO_BREAK_ITERATION { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap }, /* dataFormat="Brk " */ - { { 0x54, 0x72, 0x44, 0x63 }, triedict_swap }, /* dataFormat="TrDc " */ + { { 0x44, 0x69, 0x63, 0x74 }, udict_swap }, /* dataFormat="Dict" */ #endif { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap }, /* dataFormat="pnam" */ { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames }, /* dataFormat="unam" */ @@ -664,7 +858,7 @@ udata_swap(const UDataSwapper *ds, UErrorCode *pErrorCode) { char dataFormatChars[4]; const UDataInfo *pInfo; - int32_t headerSize, i, swappedLength; + int32_t i, swappedLength; if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { return 0; @@ -677,7 +871,7 @@ udata_swap(const UDataSwapper *ds, * information. Otherwise we would have to pass some of the information * and not be able to use the UDataSwapFn signature. */ - headerSize=udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); + udata_swapDataHeader(ds, inData, -1, NULL, pErrorCode); /* * If we wanted udata_swap() to also handle non-loadable data like a UTrie, @@ -704,7 +898,7 @@ udata_swap(const UDataSwapper *ds, } /* dispatch to the swap function for the dataFormat */ - for(i=0; idataFormat, 4)) { swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);