2 **********************************************************************
3 * Copyright (C) 2008-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/uspoof.h"
10 #include "unicode/uchar.h"
11 #include "unicode/uniset.h"
12 #include "unicode/utf16.h"
16 #include "identifier_info.h"
17 #include "scriptset.h"
21 #include "uspoof_impl.h"
23 #if !UCONFIG_NO_NORMALIZATION
28 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl
)
30 SpoofImpl::SpoofImpl(SpoofData
*data
, UErrorCode
&status
) :
31 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
32 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
33 if (U_FAILURE(status
)) {
37 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
39 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
40 allowedCharsSet
->freeze();
41 fAllowedCharsSet
= allowedCharsSet
;
42 fAllowedLocales
= uprv_strdup("");
43 if (fAllowedCharsSet
== NULL
|| fAllowedLocales
== NULL
) {
44 status
= U_MEMORY_ALLOCATION_ERROR
;
47 fMagic
= USPOOF_MAGIC
;
51 SpoofImpl::SpoofImpl() :
52 fMagic(USPOOF_MAGIC
), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
53 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
54 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
55 allowedCharsSet
->freeze();
56 fAllowedCharsSet
= allowedCharsSet
;
57 fAllowedLocales
= uprv_strdup("");
58 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
62 // Copy Constructor, used by the user level clone() function.
63 SpoofImpl::SpoofImpl(const SpoofImpl
&src
, UErrorCode
&status
) :
64 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
65 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
66 if (U_FAILURE(status
)) {
70 fChecks
= src
.fChecks
;
71 if (src
.fSpoofData
!= NULL
) {
72 fSpoofData
= src
.fSpoofData
->addReference();
74 fAllowedCharsSet
= static_cast<const UnicodeSet
*>(src
.fAllowedCharsSet
->clone());
75 if (fAllowedCharsSet
== NULL
) {
76 status
= U_MEMORY_ALLOCATION_ERROR
;
78 fAllowedLocales
= uprv_strdup(src
.fAllowedLocales
);
79 fRestrictionLevel
= src
.fRestrictionLevel
;
82 SpoofImpl::~SpoofImpl() {
83 fMagic
= 0; // head off application errors by preventing use of
84 // of deleted objects.
85 if (fSpoofData
!= NULL
) {
86 fSpoofData
->removeReference(); // Will delete if refCount goes to zero.
88 delete fAllowedCharsSet
;
89 uprv_free((void *)fAllowedLocales
);
90 delete fCachedIdentifierInfo
;
94 // Incoming parameter check on Status and the SpoofChecker object
95 // received from the C API.
97 const SpoofImpl
*SpoofImpl::validateThis(const USpoofChecker
*sc
, UErrorCode
&status
) {
98 if (U_FAILURE(status
)) {
102 status
= U_ILLEGAL_ARGUMENT_ERROR
;
105 SpoofImpl
*This
= (SpoofImpl
*)sc
;
106 if (This
->fMagic
!= USPOOF_MAGIC
||
107 This
->fSpoofData
== NULL
) {
108 status
= U_INVALID_FORMAT_ERROR
;
111 if (!SpoofData::validateDataVersion(This
->fSpoofData
->fRawData
, status
)) {
117 SpoofImpl
*SpoofImpl::validateThis(USpoofChecker
*sc
, UErrorCode
&status
) {
118 return const_cast<SpoofImpl
*>
119 (SpoofImpl::validateThis(const_cast<const USpoofChecker
*>(sc
), status
));
124 //--------------------------------------------------------------------------------------
126 // confusableLookup() This is the heart of the confusable skeleton generation
129 // Given a source character, produce the corresponding
130 // replacement character(s), appending them to the dest string.
132 //---------------------------------------------------------------------------------------
133 int32_t SpoofImpl::confusableLookup(UChar32 inChar
, int32_t tableMask
, UnicodeString
&dest
) const {
135 // Binary search the spoof data key table for the inChar
136 int32_t *low
= fSpoofData
->fCFUKeys
;
138 int32_t *limit
= low
+ fSpoofData
->fRawData
->fCFUKeysSize
;
141 int32_t delta
= ((int32_t)(limit
-low
))/2;
143 midc
= *mid
& 0x1fffff;
144 if (inChar
== midc
) {
146 } else if (inChar
< midc
) {
151 } while (low
< limit
-1);
153 midc
= *mid
& 0x1fffff;
154 if (inChar
!= midc
) {
155 // Char not found. It maps to itself.
161 int32_t keyFlags
= *mid
& 0xff000000;
162 if ((keyFlags
& tableMask
) == 0) {
163 // We found the right key char, but the entry doesn't pertain to the
164 // table we need. See if there is an adjacent key that does
165 if (keyFlags
& USPOOF_KEY_MULTIPLE_VALUES
) {
167 for (altMid
= mid
-1; (*altMid
&0x00ffffff) == inChar
; altMid
--) {
168 keyFlags
= *altMid
& 0xff000000;
169 if (keyFlags
& tableMask
) {
174 for (altMid
= mid
+1; (*altMid
&0x00ffffff) == inChar
; altMid
++) {
175 keyFlags
= *altMid
& 0xff000000;
176 if (keyFlags
& tableMask
) {
182 // No key entry for this char & table.
183 // The input char maps to itself.
190 int32_t stringLen
= USPOOF_KEY_LENGTH_FIELD(keyFlags
) + 1;
191 int32_t keyTableIndex
= (int32_t)(mid
- fSpoofData
->fCFUKeys
);
193 // Value is either a UChar (for strings of length 1) or
194 // an index into the string table (for longer strings)
195 uint16_t value
= fSpoofData
->fCFUValues
[keyTableIndex
];
196 if (stringLen
== 1) {
197 dest
.append((UChar
)value
);
201 // String length of 4 from the above lookup is used for all strings of length >= 4.
202 // For these, get the real length from the string lengths table,
203 // which maps string table indexes to lengths.
204 // All strings of the same length are stored contiguously in the string table.
205 // 'value' from the lookup above is the starting index for the desired string.
208 if (stringLen
== 4) {
209 int32_t stringLengthsLimit
= fSpoofData
->fRawData
->fCFUStringLengthsSize
;
210 for (ix
= 0; ix
< stringLengthsLimit
; ix
++) {
211 if (fSpoofData
->fCFUStringLengths
[ix
].fLastString
>= value
) {
212 stringLen
= fSpoofData
->fCFUStringLengths
[ix
].fStrLength
;
216 U_ASSERT(ix
< stringLengthsLimit
);
219 U_ASSERT(value
+ stringLen
<= fSpoofData
->fRawData
->fCFUStringTableLen
);
220 UChar
*src
= &fSpoofData
->fCFUStrings
[value
];
221 dest
.append(src
, stringLen
);
226 //---------------------------------------------------------------------------------------
228 // wholeScriptCheck()
230 // Input text is already normalized to NFD
231 // Return the set of scripts, each of which can represent something that is
232 // confusable with the input text. The script of the input text
233 // is included; input consisting of characters from a single script will
234 // always produce a result consisting of a set containing that script.
236 //---------------------------------------------------------------------------------------
237 void SpoofImpl::wholeScriptCheck(
238 const UnicodeString
&text
, ScriptSet
*result
, UErrorCode
&status
) const {
241 (fChecks
& USPOOF_ANY_CASE
) ? fSpoofData
->fAnyCaseTrie
: fSpoofData
->fLowerCaseTrie
;
243 int32_t length
= text
.length();
244 for (int32_t inputIdx
=0; inputIdx
< length
;) {
245 UChar32 c
= text
.char32At(inputIdx
);
246 inputIdx
+= U16_LENGTH(c
);
247 uint32_t index
= utrie2_get32(table
, c
);
249 // No confusables in another script for this char.
250 // TODO: we should change the data to have sets with just the single script
251 // bit for the script of this char. Gets rid of this special case.
252 // Until then, grab the script from the char and intersect it with the set.
253 UScriptCode cpScript
= uscript_getScript(c
, &status
);
254 U_ASSERT(cpScript
> USCRIPT_INHERITED
);
255 result
->intersect(cpScript
, status
);
256 } else if (index
== 1) {
257 // Script == Common or Inherited. Nothing to do.
259 result
->intersect(fSpoofData
->fScriptSets
[index
]);
265 void SpoofImpl::setAllowedLocales(const char *localesList
, UErrorCode
&status
) {
266 UnicodeSet allowedChars
;
267 UnicodeSet
*tmpSet
= NULL
;
268 const char *locStart
= localesList
;
269 const char *locEnd
= NULL
;
270 const char *localesListEnd
= localesList
+ uprv_strlen(localesList
);
271 int32_t localeListCount
= 0; // Number of locales provided by caller.
273 // Loop runs once per locale from the localesList, a comma separated list of locales.
275 locEnd
= uprv_strchr(locStart
, ',');
276 if (locEnd
== NULL
) {
277 locEnd
= localesListEnd
;
279 while (*locStart
== ' ') {
282 const char *trimmedEnd
= locEnd
-1;
283 while (trimmedEnd
> locStart
&& *trimmedEnd
== ' ') {
286 if (trimmedEnd
<= locStart
) {
289 const char *locale
= uprv_strndup(locStart
, (int32_t)(trimmedEnd
+ 1 - locStart
));
292 // We have one locale from the locales list.
293 // Add the script chars for this locale to the accumulating set of allowed chars.
294 // If the locale is no good, we will be notified back via status.
295 addScriptChars(locale
, &allowedChars
, status
);
296 uprv_free((void *)locale
);
297 if (U_FAILURE(status
)) {
300 locStart
= locEnd
+ 1;
301 } while (locStart
< localesListEnd
);
303 // If our caller provided an empty list of locales, we disable the allowed characters checking
304 if (localeListCount
== 0) {
305 uprv_free((void *)fAllowedLocales
);
306 fAllowedLocales
= uprv_strdup("");
307 tmpSet
= new UnicodeSet(0, 0x10ffff);
308 if (fAllowedLocales
== NULL
|| tmpSet
== NULL
) {
309 status
= U_MEMORY_ALLOCATION_ERROR
;
313 delete fAllowedCharsSet
;
314 fAllowedCharsSet
= tmpSet
;
315 fChecks
&= ~USPOOF_CHAR_LIMIT
;
320 // Add all common and inherited characters to the set of allowed chars.
322 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_COMMON
, status
);
323 allowedChars
.addAll(tempSet
);
324 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_INHERITED
, status
);
325 allowedChars
.addAll(tempSet
);
327 // If anything went wrong, we bail out without changing
328 // the state of the spoof checker.
329 if (U_FAILURE(status
)) {
333 // Store the updated spoof checker state.
334 tmpSet
= static_cast<UnicodeSet
*>(allowedChars
.clone());
335 const char *tmpLocalesList
= uprv_strdup(localesList
);
336 if (tmpSet
== NULL
|| tmpLocalesList
== NULL
) {
337 status
= U_MEMORY_ALLOCATION_ERROR
;
340 uprv_free((void *)fAllowedLocales
);
341 fAllowedLocales
= tmpLocalesList
;
343 delete fAllowedCharsSet
;
344 fAllowedCharsSet
= tmpSet
;
345 fChecks
|= USPOOF_CHAR_LIMIT
;
349 const char * SpoofImpl::getAllowedLocales(UErrorCode
&/*status*/) {
350 return fAllowedLocales
;
354 // Given a locale (a language), add all the characters from all of the scripts used with that language
355 // to the allowedChars UnicodeSet
357 void SpoofImpl::addScriptChars(const char *locale
, UnicodeSet
*allowedChars
, UErrorCode
&status
) {
358 UScriptCode scripts
[30];
360 int32_t numScripts
= uscript_getCode(locale
, scripts
, sizeof(scripts
)/sizeof(UScriptCode
), &status
);
361 if (U_FAILURE(status
)) {
364 if (status
== U_USING_DEFAULT_WARNING
) {
365 status
= U_ILLEGAL_ARGUMENT_ERROR
;
370 for (i
=0; i
<numScripts
; i
++) {
371 tmpSet
.applyIntPropertyValue(UCHAR_SCRIPT
, scripts
[i
], status
);
372 allowedChars
->addAll(tmpSet
);
377 // Convert a text format hex number. Utility function used by builder code. Static.
378 // Input: UChar *string text. Output: a UChar32
379 // Input has been pre-checked, and will have no non-hex chars.
380 // The number must fall in the code point range of 0..0x10ffff
382 UChar32
SpoofImpl::ScanHex(const UChar
*s
, int32_t start
, int32_t limit
, UErrorCode
&status
) {
383 if (U_FAILURE(status
)) {
386 U_ASSERT(limit
-start
> 0);
389 for (i
=start
; i
<limit
; i
++) {
390 int digitVal
= s
[i
] - 0x30;
392 digitVal
= 0xa + (s
[i
] - 0x41); // Upper Case 'A'
395 digitVal
= 0xa + (s
[i
] - 0x61); // Lower Case 'a'
397 U_ASSERT(digitVal
<= 0xf);
401 if (val
> 0x10ffff) {
402 status
= U_PARSE_ERROR
;
408 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
409 // Maintain a one-element cache, which is sufficient to avoid repeatedly
410 // creating new ones unless we get multi-thread concurrency in spoof
411 // check operations, which should be statistically uncommon.
413 // These functions are used in place of new & delete of an IdentifierInfo.
414 // They will recycle the IdentifierInfo when possible.
415 // They are logically const, and used within const functions that must be thread safe.
416 IdentifierInfo
*SpoofImpl::getIdentifierInfo(UErrorCode
&status
) const {
417 IdentifierInfo
*returnIdInfo
= NULL
;
418 if (U_FAILURE(status
)) {
421 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
424 returnIdInfo
= nonConstThis
->fCachedIdentifierInfo
;
425 nonConstThis
->fCachedIdentifierInfo
= NULL
;
427 if (returnIdInfo
== NULL
) {
428 returnIdInfo
= new IdentifierInfo(status
);
429 if (U_SUCCESS(status
) && returnIdInfo
== NULL
) {
430 status
= U_MEMORY_ALLOCATION_ERROR
;
432 if (U_FAILURE(status
) && returnIdInfo
!= NULL
) {
441 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo
*idInfo
) const {
442 if (idInfo
!= NULL
) {
443 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
446 if (nonConstThis
->fCachedIdentifierInfo
== NULL
) {
447 nonConstThis
->fCachedIdentifierInfo
= idInfo
;
458 //----------------------------------------------------------------------------------------------
460 // class SpoofData Implementation
462 //----------------------------------------------------------------------------------------------
465 UBool
SpoofData::validateDataVersion(const SpoofDataHeader
*rawData
, UErrorCode
&status
) {
466 if (U_FAILURE(status
) ||
468 rawData
->fMagic
!= USPOOF_MAGIC
||
469 rawData
->fFormatVersion
[0] > 1 ||
470 rawData
->fFormatVersion
[1] > 0) {
471 status
= U_INVALID_FORMAT_ERROR
;
477 static UBool U_CALLCONV
478 spoofDataIsAcceptable(void *context
,
479 const char * /* type */, const char * /*name*/,
480 const UDataInfo
*pInfo
) {
483 pInfo
->isBigEndian
== U_IS_BIG_ENDIAN
&&
484 pInfo
->charsetFamily
== U_CHARSET_FAMILY
&&
485 pInfo
->dataFormat
[0] == 0x43 && // dataFormat="Cfu "
486 pInfo
->dataFormat
[1] == 0x66 &&
487 pInfo
->dataFormat
[2] == 0x75 &&
488 pInfo
->dataFormat
[3] == 0x20 &&
489 pInfo
->formatVersion
[0] == 1
491 UVersionInfo
*version
= static_cast<UVersionInfo
*>(context
);
492 if(version
!= NULL
) {
493 uprv_memcpy(version
, pInfo
->dataVersion
, 4);
502 // SpoofData::getDefault() - return a wrapper around the spoof data that is
503 // baked into the default ICU data.
505 SpoofData
*SpoofData::getDefault(UErrorCode
&status
) {
506 // TODO: Cache it. Lazy create, keep until cleanup.
508 UDataMemory
*udm
= udata_openChoice(NULL
, "cfu", "confusables",
509 spoofDataIsAcceptable
,
510 NULL
, // context, would receive dataVersion if supplied.
512 if (U_FAILURE(status
)) {
515 SpoofData
*This
= new SpoofData(udm
, status
);
516 if (U_FAILURE(status
)) {
521 status
= U_MEMORY_ALLOCATION_ERROR
;
526 SpoofData::SpoofData(UDataMemory
*udm
, UErrorCode
&status
)
529 if (U_FAILURE(status
)) {
533 // fRawData is non-const because it may be constructed by the data builder.
534 fRawData
= reinterpret_cast<SpoofDataHeader
*>(
535 const_cast<void *>(udata_getMemory(udm
)));
536 validateDataVersion(fRawData
, status
);
541 SpoofData::SpoofData(const void *data
, int32_t length
, UErrorCode
&status
)
544 if (U_FAILURE(status
)) {
547 if ((size_t)length
< sizeof(SpoofDataHeader
)) {
548 status
= U_INVALID_FORMAT_ERROR
;
551 void *ncData
= const_cast<void *>(data
);
552 fRawData
= static_cast<SpoofDataHeader
*>(ncData
);
553 if (length
< fRawData
->fLength
) {
554 status
= U_INVALID_FORMAT_ERROR
;
557 validateDataVersion(fRawData
, status
);
562 // Spoof Data constructor for use from data builder.
563 // Initializes a new, empty data area that will be populated later.
564 SpoofData::SpoofData(UErrorCode
&status
) {
566 if (U_FAILURE(status
)) {
572 // The spoof header should already be sized to be a multiple of 16 bytes.
573 // Just in case it's not, round it up.
574 uint32_t initialSize
= (sizeof(SpoofDataHeader
) + 15) & ~15;
575 U_ASSERT(initialSize
== sizeof(SpoofDataHeader
));
577 fRawData
= static_cast<SpoofDataHeader
*>(uprv_malloc(initialSize
));
578 fMemLimit
= initialSize
;
579 if (fRawData
== NULL
) {
580 status
= U_MEMORY_ALLOCATION_ERROR
;
583 uprv_memset(fRawData
, 0, initialSize
);
585 fRawData
->fMagic
= USPOOF_MAGIC
;
586 fRawData
->fFormatVersion
[0] = 1;
587 fRawData
->fFormatVersion
[1] = 0;
588 fRawData
->fFormatVersion
[2] = 0;
589 fRawData
->fFormatVersion
[3] = 0;
593 // reset() - initialize all fields.
594 // Should be updated if any new fields are added.
595 // Called by constructors to put things in a known initial state.
596 void SpoofData::reset() {
604 fCFUStringLengths
= NULL
;
607 fLowerCaseTrie
= NULL
;
612 // SpoofData::initPtrs()
613 // Initialize the pointers to the various sections of the raw data.
615 // This function is used both during the Trie building process (multiple
616 // times, as the individual data sections are added), and
617 // during the opening of a Spoof Checker from prebuilt data.
619 // The pointers for non-existent data sections (identified by an offset of 0)
622 // Note: During building the data, adding each new data section
623 // reallocs the raw data area, which likely relocates it, which
624 // in turn requires reinitializing all of the pointers into it, hence
625 // multiple calls to this function during building.
627 void SpoofData::initPtrs(UErrorCode
&status
) {
630 fCFUStringLengths
= NULL
;
632 if (U_FAILURE(status
)) {
635 if (fRawData
->fCFUKeys
!= 0) {
636 fCFUKeys
= (int32_t *)((char *)fRawData
+ fRawData
->fCFUKeys
);
638 if (fRawData
->fCFUStringIndex
!= 0) {
639 fCFUValues
= (uint16_t *)((char *)fRawData
+ fRawData
->fCFUStringIndex
);
641 if (fRawData
->fCFUStringLengths
!= 0) {
642 fCFUStringLengths
= (SpoofStringLengthsElement
*)((char *)fRawData
+ fRawData
->fCFUStringLengths
);
644 if (fRawData
->fCFUStringTable
!= 0) {
645 fCFUStrings
= (UChar
*)((char *)fRawData
+ fRawData
->fCFUStringTable
);
648 if (fAnyCaseTrie
== NULL
&& fRawData
->fAnyCaseTrie
!= 0) {
649 fAnyCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
650 (char *)fRawData
+ fRawData
->fAnyCaseTrie
, fRawData
->fAnyCaseTrieLength
, NULL
, &status
);
652 if (fLowerCaseTrie
== NULL
&& fRawData
->fLowerCaseTrie
!= 0) {
653 fLowerCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
654 (char *)fRawData
+ fRawData
->fLowerCaseTrie
, fRawData
->fLowerCaseTrieLength
, NULL
, &status
);
657 if (fRawData
->fScriptSets
!= 0) {
658 fScriptSets
= (ScriptSet
*)((char *)fRawData
+ fRawData
->fScriptSets
);
663 SpoofData::~SpoofData() {
664 utrie2_close(fAnyCaseTrie
);
666 utrie2_close(fLowerCaseTrie
);
667 fLowerCaseTrie
= NULL
;
679 void SpoofData::removeReference() {
680 if (umtx_atomic_dec(&fRefCount
) == 0) {
686 SpoofData
*SpoofData::addReference() {
687 umtx_atomic_inc(&fRefCount
);
692 void *SpoofData::reserveSpace(int32_t numBytes
, UErrorCode
&status
) {
693 if (U_FAILURE(status
)) {
698 status
= U_INTERNAL_PROGRAM_ERROR
;
702 numBytes
= (numBytes
+ 15) & ~15; // Round up to a multiple of 16
703 uint32_t returnOffset
= fMemLimit
;
704 fMemLimit
+= numBytes
;
705 fRawData
= static_cast<SpoofDataHeader
*>(uprv_realloc(fRawData
, fMemLimit
));
706 fRawData
->fLength
= fMemLimit
;
707 uprv_memset((char *)fRawData
+ returnOffset
, 0, numBytes
);
709 return (char *)fRawData
+ returnOffset
;
717 //-----------------------------------------------------------------------------
719 // uspoof_swap - byte swap and char encoding swap of spoof data
721 //-----------------------------------------------------------------------------
722 U_CAPI
int32_t U_EXPORT2
723 uspoof_swap(const UDataSwapper
*ds
, const void *inData
, int32_t length
, void *outData
,
724 UErrorCode
*status
) {
726 if (status
== NULL
|| U_FAILURE(*status
)) {
729 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
730 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
735 // Check that the data header is for spoof data.
736 // (Header contents are defined in gencfu.cpp)
738 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+4);
739 if(!( pInfo
->dataFormat
[0]==0x43 && /* dataFormat="Cfu " */
740 pInfo
->dataFormat
[1]==0x66 &&
741 pInfo
->dataFormat
[2]==0x75 &&
742 pInfo
->dataFormat
[3]==0x20 &&
743 pInfo
->formatVersion
[0]==1 )) {
744 udata_printError(ds
, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
745 "(format version %02x %02x %02x %02x) is not recognized\n",
746 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
747 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
748 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1],
749 pInfo
->formatVersion
[2], pInfo
->formatVersion
[3]);
750 *status
=U_UNSUPPORTED_ERROR
;
755 // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific
756 // header). This swap also conveniently gets us
757 // the size of the ICU d.h., which lets us locate the start
758 // of the uspoof specific data.
760 int32_t headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
764 // Get the Spoof Data Header, and check that it appears to be OK.
767 const uint8_t *inBytes
=(const uint8_t *)inData
+headerSize
;
768 SpoofDataHeader
*spoofDH
= (SpoofDataHeader
*)inBytes
;
769 if (ds
->readUInt32(spoofDH
->fMagic
) != USPOOF_MAGIC
||
770 ds
->readUInt32(spoofDH
->fLength
) < sizeof(SpoofDataHeader
))
772 udata_printError(ds
, "uspoof_swap(): Spoof Data header is invalid.\n");
773 *status
=U_UNSUPPORTED_ERROR
;
778 // Prefight operation? Just return the size
780 int32_t spoofDataLength
= ds
->readUInt32(spoofDH
->fLength
);
781 int32_t totalSize
= headerSize
+ spoofDataLength
;
787 // Check that length passed in is consistent with length from Spoof data header.
789 if (length
< totalSize
) {
790 udata_printError(ds
, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
792 *status
=U_INDEX_OUTOFBOUNDS_ERROR
;
798 // Swap the Data. Do the data itself first, then the Spoof Data Header, because
799 // we need to reference the header to locate the data, and an
800 // inplace swap of the header leaves it unusable.
802 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
803 SpoofDataHeader
*outputDH
= (SpoofDataHeader
*)outBytes
;
805 int32_t sectionStart
;
806 int32_t sectionLength
;
809 // If not swapping in place, zero out the output buffer before starting.
810 // Gaps may exist between the individual sections, and these must be zeroed in
811 // the output buffer. The simplest way to do that is to just zero the whole thing.
813 if (inBytes
!= outBytes
) {
814 uprv_memset(outBytes
, 0, spoofDataLength
);
817 // Confusables Keys Section (fCFUKeys)
818 sectionStart
= ds
->readUInt32(spoofDH
->fCFUKeys
);
819 sectionLength
= ds
->readUInt32(spoofDH
->fCFUKeysSize
) * 4;
820 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
822 // String Index Section
823 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringIndex
);
824 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringIndexSize
) * 2;
825 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
827 // String Table Section
828 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringTable
);
829 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringTableLen
) * 2;
830 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
832 // String Lengths Section
833 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringLengths
);
834 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringLengthsSize
) * 4;
835 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
838 sectionStart
= ds
->readUInt32(spoofDH
->fAnyCaseTrie
);
839 sectionLength
= ds
->readUInt32(spoofDH
->fAnyCaseTrieLength
);
840 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
843 sectionStart
= ds
->readUInt32(spoofDH
->fLowerCaseTrie
);
844 sectionLength
= ds
->readUInt32(spoofDH
->fLowerCaseTrieLength
);
845 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
847 // Script Sets. The data is an array of int32_t
848 sectionStart
= ds
->readUInt32(spoofDH
->fScriptSets
);
849 sectionLength
= ds
->readUInt32(spoofDH
->fScriptSetsLength
) * sizeof(ScriptSet
);
850 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
852 // And, last, swap the header itself.
853 // int32_t fMagic // swap this
854 // uint8_t fFormatVersion[4] // Do not swap this, just copy
855 // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff.
857 uint32_t magic
= ds
->readUInt32(spoofDH
->fMagic
);
858 ds
->writeUInt32((uint32_t *)&outputDH
->fMagic
, magic
);
860 if (outputDH
->fFormatVersion
!= spoofDH
->fFormatVersion
) {
861 uprv_memcpy(outputDH
->fFormatVersion
, spoofDH
->fFormatVersion
, sizeof(spoofDH
->fFormatVersion
));
863 // swap starting at fLength
864 ds
->swapArray32(ds
, &spoofDH
->fLength
, sizeof(SpoofDataHeader
)-8 /* minus magic and fFormatVersion[4] */, &outputDH
->fLength
, status
);