2 **********************************************************************
3 * Copyright (C) 2008-2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/uspoof.h"
10 #include "unicode/uchar.h"
11 #include "unicode/uniset.h"
12 #include "unicode/utf16.h"
16 #include "identifier_info.h"
17 #include "scriptset.h"
22 #include "uspoof_impl.h"
24 #if !UCONFIG_NO_NORMALIZATION
29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl
)
31 SpoofImpl::SpoofImpl(SpoofData
*data
, UErrorCode
&status
) :
32 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
33 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
34 if (U_FAILURE(status
)) {
38 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
40 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
41 allowedCharsSet
->freeze();
42 fAllowedCharsSet
= allowedCharsSet
;
43 fAllowedLocales
= uprv_strdup("");
44 if (fAllowedCharsSet
== NULL
|| fAllowedLocales
== NULL
) {
45 status
= U_MEMORY_ALLOCATION_ERROR
;
48 fMagic
= USPOOF_MAGIC
;
52 SpoofImpl::SpoofImpl() :
53 fMagic(USPOOF_MAGIC
), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
54 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
55 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
56 allowedCharsSet
->freeze();
57 fAllowedCharsSet
= allowedCharsSet
;
58 fAllowedLocales
= uprv_strdup("");
59 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
63 // Copy Constructor, used by the user level clone() function.
64 SpoofImpl::SpoofImpl(const SpoofImpl
&src
, UErrorCode
&status
) :
65 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
66 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
67 if (U_FAILURE(status
)) {
71 fChecks
= src
.fChecks
;
72 if (src
.fSpoofData
!= NULL
) {
73 fSpoofData
= src
.fSpoofData
->addReference();
75 fAllowedCharsSet
= static_cast<const UnicodeSet
*>(src
.fAllowedCharsSet
->clone());
76 if (fAllowedCharsSet
== NULL
) {
77 status
= U_MEMORY_ALLOCATION_ERROR
;
79 fAllowedLocales
= uprv_strdup(src
.fAllowedLocales
);
80 fRestrictionLevel
= src
.fRestrictionLevel
;
83 SpoofImpl::~SpoofImpl() {
84 fMagic
= 0; // head off application errors by preventing use of
85 // of deleted objects.
86 if (fSpoofData
!= NULL
) {
87 fSpoofData
->removeReference(); // Will delete if refCount goes to zero.
89 delete fAllowedCharsSet
;
90 uprv_free((void *)fAllowedLocales
);
91 delete fCachedIdentifierInfo
;
95 // Incoming parameter check on Status and the SpoofChecker object
96 // received from the C API.
98 const SpoofImpl
*SpoofImpl::validateThis(const USpoofChecker
*sc
, UErrorCode
&status
) {
99 if (U_FAILURE(status
)) {
103 status
= U_ILLEGAL_ARGUMENT_ERROR
;
106 SpoofImpl
*This
= (SpoofImpl
*)sc
;
107 if (This
->fMagic
!= USPOOF_MAGIC
||
108 This
->fSpoofData
== NULL
) {
109 status
= U_INVALID_FORMAT_ERROR
;
112 if (!SpoofData::validateDataVersion(This
->fSpoofData
->fRawData
, status
)) {
118 SpoofImpl
*SpoofImpl::validateThis(USpoofChecker
*sc
, UErrorCode
&status
) {
119 return const_cast<SpoofImpl
*>
120 (SpoofImpl::validateThis(const_cast<const USpoofChecker
*>(sc
), status
));
125 //--------------------------------------------------------------------------------------
127 // confusableLookup() This is the heart of the confusable skeleton generation
130 // Given a source character, produce the corresponding
131 // replacement character(s), appending them to the dest string.
133 //---------------------------------------------------------------------------------------
134 int32_t SpoofImpl::confusableLookup(UChar32 inChar
, int32_t tableMask
, UnicodeString
&dest
) const {
136 // Binary search the spoof data key table for the inChar
137 int32_t *low
= fSpoofData
->fCFUKeys
;
139 int32_t *limit
= low
+ fSpoofData
->fRawData
->fCFUKeysSize
;
142 int32_t delta
= ((int32_t)(limit
-low
))/2;
144 midc
= *mid
& 0x1fffff;
145 if (inChar
== midc
) {
147 } else if (inChar
< midc
) {
152 } while (low
< limit
-1);
154 midc
= *mid
& 0x1fffff;
155 if (inChar
!= midc
) {
156 // Char not found. It maps to itself.
162 int32_t keyFlags
= *mid
& 0xff000000;
163 if ((keyFlags
& tableMask
) == 0) {
164 // We found the right key char, but the entry doesn't pertain to the
165 // table we need. See if there is an adjacent key that does
166 if (keyFlags
& USPOOF_KEY_MULTIPLE_VALUES
) {
168 for (altMid
= mid
-1; (*altMid
&0x00ffffff) == inChar
; altMid
--) {
169 keyFlags
= *altMid
& 0xff000000;
170 if (keyFlags
& tableMask
) {
175 for (altMid
= mid
+1; (*altMid
&0x00ffffff) == inChar
; altMid
++) {
176 keyFlags
= *altMid
& 0xff000000;
177 if (keyFlags
& tableMask
) {
183 // No key entry for this char & table.
184 // The input char maps to itself.
191 int32_t stringLen
= USPOOF_KEY_LENGTH_FIELD(keyFlags
) + 1;
192 int32_t keyTableIndex
= (int32_t)(mid
- fSpoofData
->fCFUKeys
);
194 // Value is either a UChar (for strings of length 1) or
195 // an index into the string table (for longer strings)
196 uint16_t value
= fSpoofData
->fCFUValues
[keyTableIndex
];
197 if (stringLen
== 1) {
198 dest
.append((UChar
)value
);
202 // String length of 4 from the above lookup is used for all strings of length >= 4.
203 // For these, get the real length from the string lengths table,
204 // which maps string table indexes to lengths.
205 // All strings of the same length are stored contiguously in the string table.
206 // 'value' from the lookup above is the starting index for the desired string.
209 if (stringLen
== 4) {
210 int32_t stringLengthsLimit
= fSpoofData
->fRawData
->fCFUStringLengthsSize
;
211 for (ix
= 0; ix
< stringLengthsLimit
; ix
++) {
212 if (fSpoofData
->fCFUStringLengths
[ix
].fLastString
>= value
) {
213 stringLen
= fSpoofData
->fCFUStringLengths
[ix
].fStrLength
;
217 U_ASSERT(ix
< stringLengthsLimit
);
220 U_ASSERT(value
+ stringLen
<= fSpoofData
->fRawData
->fCFUStringTableLen
);
221 UChar
*src
= &fSpoofData
->fCFUStrings
[value
];
222 dest
.append(src
, stringLen
);
227 //---------------------------------------------------------------------------------------
229 // wholeScriptCheck()
231 // Input text is already normalized to NFD
232 // Return the set of scripts, each of which can represent something that is
233 // confusable with the input text. The script of the input text
234 // is included; input consisting of characters from a single script will
235 // always produce a result consisting of a set containing that script.
237 //---------------------------------------------------------------------------------------
238 void SpoofImpl::wholeScriptCheck(
239 const UnicodeString
&text
, ScriptSet
*result
, UErrorCode
&status
) const {
242 (fChecks
& USPOOF_ANY_CASE
) ? fSpoofData
->fAnyCaseTrie
: fSpoofData
->fLowerCaseTrie
;
244 int32_t length
= text
.length();
245 for (int32_t inputIdx
=0; inputIdx
< length
;) {
246 UChar32 c
= text
.char32At(inputIdx
);
247 inputIdx
+= U16_LENGTH(c
);
248 uint32_t index
= utrie2_get32(table
, c
);
250 // No confusables in another script for this char.
251 // TODO: we should change the data to have sets with just the single script
252 // bit for the script of this char. Gets rid of this special case.
253 // Until then, grab the script from the char and intersect it with the set.
254 UScriptCode cpScript
= uscript_getScript(c
, &status
);
255 U_ASSERT(cpScript
> USCRIPT_INHERITED
);
256 result
->intersect(cpScript
, status
);
257 } else if (index
== 1) {
258 // Script == Common or Inherited. Nothing to do.
260 result
->intersect(fSpoofData
->fScriptSets
[index
]);
266 void SpoofImpl::setAllowedLocales(const char *localesList
, UErrorCode
&status
) {
267 UnicodeSet allowedChars
;
268 UnicodeSet
*tmpSet
= NULL
;
269 const char *locStart
= localesList
;
270 const char *locEnd
= NULL
;
271 const char *localesListEnd
= localesList
+ uprv_strlen(localesList
);
272 int32_t localeListCount
= 0; // Number of locales provided by caller.
274 // Loop runs once per locale from the localesList, a comma separated list of locales.
276 locEnd
= uprv_strchr(locStart
, ',');
277 if (locEnd
== NULL
) {
278 locEnd
= localesListEnd
;
280 while (*locStart
== ' ') {
283 const char *trimmedEnd
= locEnd
-1;
284 while (trimmedEnd
> locStart
&& *trimmedEnd
== ' ') {
287 if (trimmedEnd
<= locStart
) {
290 const char *locale
= uprv_strndup(locStart
, (int32_t)(trimmedEnd
+ 1 - locStart
));
293 // We have one locale from the locales list.
294 // Add the script chars for this locale to the accumulating set of allowed chars.
295 // If the locale is no good, we will be notified back via status.
296 addScriptChars(locale
, &allowedChars
, status
);
297 uprv_free((void *)locale
);
298 if (U_FAILURE(status
)) {
301 locStart
= locEnd
+ 1;
302 } while (locStart
< localesListEnd
);
304 // If our caller provided an empty list of locales, we disable the allowed characters checking
305 if (localeListCount
== 0) {
306 uprv_free((void *)fAllowedLocales
);
307 fAllowedLocales
= uprv_strdup("");
308 tmpSet
= new UnicodeSet(0, 0x10ffff);
309 if (fAllowedLocales
== NULL
|| tmpSet
== NULL
) {
310 status
= U_MEMORY_ALLOCATION_ERROR
;
314 delete fAllowedCharsSet
;
315 fAllowedCharsSet
= tmpSet
;
316 fChecks
&= ~USPOOF_CHAR_LIMIT
;
321 // Add all common and inherited characters to the set of allowed chars.
323 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_COMMON
, status
);
324 allowedChars
.addAll(tempSet
);
325 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_INHERITED
, status
);
326 allowedChars
.addAll(tempSet
);
328 // If anything went wrong, we bail out without changing
329 // the state of the spoof checker.
330 if (U_FAILURE(status
)) {
334 // Store the updated spoof checker state.
335 tmpSet
= static_cast<UnicodeSet
*>(allowedChars
.clone());
336 const char *tmpLocalesList
= uprv_strdup(localesList
);
337 if (tmpSet
== NULL
|| tmpLocalesList
== NULL
) {
338 status
= U_MEMORY_ALLOCATION_ERROR
;
341 uprv_free((void *)fAllowedLocales
);
342 fAllowedLocales
= tmpLocalesList
;
344 delete fAllowedCharsSet
;
345 fAllowedCharsSet
= tmpSet
;
346 fChecks
|= USPOOF_CHAR_LIMIT
;
350 const char * SpoofImpl::getAllowedLocales(UErrorCode
&/*status*/) {
351 return fAllowedLocales
;
355 // Given a locale (a language), add all the characters from all of the scripts used with that language
356 // to the allowedChars UnicodeSet
358 void SpoofImpl::addScriptChars(const char *locale
, UnicodeSet
*allowedChars
, UErrorCode
&status
) {
359 UScriptCode scripts
[30];
361 int32_t numScripts
= uscript_getCode(locale
, scripts
, sizeof(scripts
)/sizeof(UScriptCode
), &status
);
362 if (U_FAILURE(status
)) {
365 if (status
== U_USING_DEFAULT_WARNING
) {
366 status
= U_ILLEGAL_ARGUMENT_ERROR
;
371 for (i
=0; i
<numScripts
; i
++) {
372 tmpSet
.applyIntPropertyValue(UCHAR_SCRIPT
, scripts
[i
], status
);
373 allowedChars
->addAll(tmpSet
);
378 // Convert a text format hex number. Utility function used by builder code. Static.
379 // Input: UChar *string text. Output: a UChar32
380 // Input has been pre-checked, and will have no non-hex chars.
381 // The number must fall in the code point range of 0..0x10ffff
383 UChar32
SpoofImpl::ScanHex(const UChar
*s
, int32_t start
, int32_t limit
, UErrorCode
&status
) {
384 if (U_FAILURE(status
)) {
387 U_ASSERT(limit
-start
> 0);
390 for (i
=start
; i
<limit
; i
++) {
391 int digitVal
= s
[i
] - 0x30;
393 digitVal
= 0xa + (s
[i
] - 0x41); // Upper Case 'A'
396 digitVal
= 0xa + (s
[i
] - 0x61); // Lower Case 'a'
398 U_ASSERT(digitVal
<= 0xf);
402 if (val
> 0x10ffff) {
403 status
= U_PARSE_ERROR
;
409 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
410 // Maintain a one-element cache, which is sufficient to avoid repeatedly
411 // creating new ones unless we get multi-thread concurrency in spoof
412 // check operations, which should be statistically uncommon.
414 // These functions are used in place of new & delete of an IdentifierInfo.
415 // They will recycle the IdentifierInfo when possible.
416 // They are logically const, and used within const functions that must be thread safe.
417 IdentifierInfo
*SpoofImpl::getIdentifierInfo(UErrorCode
&status
) const {
418 IdentifierInfo
*returnIdInfo
= NULL
;
419 if (U_FAILURE(status
)) {
422 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
425 returnIdInfo
= nonConstThis
->fCachedIdentifierInfo
;
426 nonConstThis
->fCachedIdentifierInfo
= NULL
;
428 if (returnIdInfo
== NULL
) {
429 returnIdInfo
= new IdentifierInfo(status
);
430 if (U_SUCCESS(status
) && returnIdInfo
== NULL
) {
431 status
= U_MEMORY_ALLOCATION_ERROR
;
433 if (U_FAILURE(status
) && returnIdInfo
!= NULL
) {
442 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo
*idInfo
) const {
443 if (idInfo
!= NULL
) {
444 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
447 if (nonConstThis
->fCachedIdentifierInfo
== NULL
) {
448 nonConstThis
->fCachedIdentifierInfo
= idInfo
;
459 //----------------------------------------------------------------------------------------------
461 // class SpoofData Implementation
463 //----------------------------------------------------------------------------------------------
466 UBool
SpoofData::validateDataVersion(const SpoofDataHeader
*rawData
, UErrorCode
&status
) {
467 if (U_FAILURE(status
) ||
469 rawData
->fMagic
!= USPOOF_MAGIC
||
470 rawData
->fFormatVersion
[0] > 1 ||
471 rawData
->fFormatVersion
[1] > 0) {
472 status
= U_INVALID_FORMAT_ERROR
;
479 // SpoofData::getDefault() - return a wrapper around the spoof data that is
480 // baked into the default ICU data.
482 SpoofData
*SpoofData::getDefault(UErrorCode
&status
) {
483 // TODO: Cache it. Lazy create, keep until cleanup.
485 UDataMemory
*udm
= udata_open(NULL
, "cfu", "confusables", &status
);
486 if (U_FAILURE(status
)) {
489 SpoofData
*This
= new SpoofData(udm
, status
);
490 if (U_FAILURE(status
)) {
495 status
= U_MEMORY_ALLOCATION_ERROR
;
501 SpoofData::SpoofData(UDataMemory
*udm
, UErrorCode
&status
)
504 if (U_FAILURE(status
)) {
507 fRawData
= reinterpret_cast<SpoofDataHeader
*>
508 ((char *)(udm
->pHeader
) + udm
->pHeader
->dataHeader
.headerSize
);
510 validateDataVersion(fRawData
, status
);
515 SpoofData::SpoofData(const void *data
, int32_t length
, UErrorCode
&status
)
518 if (U_FAILURE(status
)) {
521 if ((size_t)length
< sizeof(SpoofDataHeader
)) {
522 status
= U_INVALID_FORMAT_ERROR
;
525 void *ncData
= const_cast<void *>(data
);
526 fRawData
= static_cast<SpoofDataHeader
*>(ncData
);
527 if (length
< fRawData
->fLength
) {
528 status
= U_INVALID_FORMAT_ERROR
;
531 validateDataVersion(fRawData
, status
);
536 // Spoof Data constructor for use from data builder.
537 // Initializes a new, empty data area that will be populated later.
538 SpoofData::SpoofData(UErrorCode
&status
) {
540 if (U_FAILURE(status
)) {
546 // The spoof header should already be sized to be a multiple of 16 bytes.
547 // Just in case it's not, round it up.
548 uint32_t initialSize
= (sizeof(SpoofDataHeader
) + 15) & ~15;
549 U_ASSERT(initialSize
== sizeof(SpoofDataHeader
));
551 fRawData
= static_cast<SpoofDataHeader
*>(uprv_malloc(initialSize
));
552 fMemLimit
= initialSize
;
553 if (fRawData
== NULL
) {
554 status
= U_MEMORY_ALLOCATION_ERROR
;
557 uprv_memset(fRawData
, 0, initialSize
);
559 fRawData
->fMagic
= USPOOF_MAGIC
;
560 fRawData
->fFormatVersion
[0] = 1;
561 fRawData
->fFormatVersion
[1] = 0;
562 fRawData
->fFormatVersion
[2] = 0;
563 fRawData
->fFormatVersion
[3] = 0;
567 // reset() - initialize all fields.
568 // Should be updated if any new fields are added.
569 // Called by constructors to put things in a known initial state.
570 void SpoofData::reset() {
578 fCFUStringLengths
= NULL
;
581 fLowerCaseTrie
= NULL
;
586 // SpoofData::initPtrs()
587 // Initialize the pointers to the various sections of the raw data.
589 // This function is used both during the Trie building process (multiple
590 // times, as the individual data sections are added), and
591 // during the opening of a Spoof Checker from prebuilt data.
593 // The pointers for non-existent data sections (identified by an offset of 0)
596 // Note: During building the data, adding each new data section
597 // reallocs the raw data area, which likely relocates it, which
598 // in turn requires reinitializing all of the pointers into it, hence
599 // multiple calls to this function during building.
601 void SpoofData::initPtrs(UErrorCode
&status
) {
604 fCFUStringLengths
= NULL
;
606 if (U_FAILURE(status
)) {
609 if (fRawData
->fCFUKeys
!= 0) {
610 fCFUKeys
= (int32_t *)((char *)fRawData
+ fRawData
->fCFUKeys
);
612 if (fRawData
->fCFUStringIndex
!= 0) {
613 fCFUValues
= (uint16_t *)((char *)fRawData
+ fRawData
->fCFUStringIndex
);
615 if (fRawData
->fCFUStringLengths
!= 0) {
616 fCFUStringLengths
= (SpoofStringLengthsElement
*)((char *)fRawData
+ fRawData
->fCFUStringLengths
);
618 if (fRawData
->fCFUStringTable
!= 0) {
619 fCFUStrings
= (UChar
*)((char *)fRawData
+ fRawData
->fCFUStringTable
);
622 if (fAnyCaseTrie
== NULL
&& fRawData
->fAnyCaseTrie
!= 0) {
623 fAnyCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
624 (char *)fRawData
+ fRawData
->fAnyCaseTrie
, fRawData
->fAnyCaseTrieLength
, NULL
, &status
);
626 if (fLowerCaseTrie
== NULL
&& fRawData
->fLowerCaseTrie
!= 0) {
627 fLowerCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
628 (char *)fRawData
+ fRawData
->fLowerCaseTrie
, fRawData
->fLowerCaseTrieLength
, NULL
, &status
);
631 if (fRawData
->fScriptSets
!= 0) {
632 fScriptSets
= (ScriptSet
*)((char *)fRawData
+ fRawData
->fScriptSets
);
637 SpoofData::~SpoofData() {
638 utrie2_close(fAnyCaseTrie
);
640 utrie2_close(fLowerCaseTrie
);
641 fLowerCaseTrie
= NULL
;
653 void SpoofData::removeReference() {
654 if (umtx_atomic_dec(&fRefCount
) == 0) {
660 SpoofData
*SpoofData::addReference() {
661 umtx_atomic_inc(&fRefCount
);
666 void *SpoofData::reserveSpace(int32_t numBytes
, UErrorCode
&status
) {
667 if (U_FAILURE(status
)) {
672 status
= U_INTERNAL_PROGRAM_ERROR
;
676 numBytes
= (numBytes
+ 15) & ~15; // Round up to a multiple of 16
677 uint32_t returnOffset
= fMemLimit
;
678 fMemLimit
+= numBytes
;
679 fRawData
= static_cast<SpoofDataHeader
*>(uprv_realloc(fRawData
, fMemLimit
));
680 fRawData
->fLength
= fMemLimit
;
681 uprv_memset((char *)fRawData
+ returnOffset
, 0, numBytes
);
683 return (char *)fRawData
+ returnOffset
;
691 //-----------------------------------------------------------------------------
693 // uspoof_swap - byte swap and char encoding swap of spoof data
695 //-----------------------------------------------------------------------------
696 U_CAPI
int32_t U_EXPORT2
697 uspoof_swap(const UDataSwapper
*ds
, const void *inData
, int32_t length
, void *outData
,
698 UErrorCode
*status
) {
700 if (status
== NULL
|| U_FAILURE(*status
)) {
703 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
704 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
709 // Check that the data header is for spoof data.
710 // (Header contents are defined in gencfu.cpp)
712 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+4);
713 if(!( pInfo
->dataFormat
[0]==0x43 && /* dataFormat="Cfu " */
714 pInfo
->dataFormat
[1]==0x66 &&
715 pInfo
->dataFormat
[2]==0x75 &&
716 pInfo
->dataFormat
[3]==0x20 &&
717 pInfo
->formatVersion
[0]==1 )) {
718 udata_printError(ds
, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
719 "(format version %02x %02x %02x %02x) is not recognized\n",
720 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
721 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
722 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1],
723 pInfo
->formatVersion
[2], pInfo
->formatVersion
[3]);
724 *status
=U_UNSUPPORTED_ERROR
;
729 // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific
730 // header). This swap also conveniently gets us
731 // the size of the ICU d.h., which lets us locate the start
732 // of the uspoof specific data.
734 int32_t headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
738 // Get the Spoof Data Header, and check that it appears to be OK.
741 const uint8_t *inBytes
=(const uint8_t *)inData
+headerSize
;
742 SpoofDataHeader
*spoofDH
= (SpoofDataHeader
*)inBytes
;
743 if (ds
->readUInt32(spoofDH
->fMagic
) != USPOOF_MAGIC
||
744 ds
->readUInt32(spoofDH
->fLength
) < sizeof(SpoofDataHeader
))
746 udata_printError(ds
, "uspoof_swap(): Spoof Data header is invalid.\n");
747 *status
=U_UNSUPPORTED_ERROR
;
752 // Prefight operation? Just return the size
754 int32_t spoofDataLength
= ds
->readUInt32(spoofDH
->fLength
);
755 int32_t totalSize
= headerSize
+ spoofDataLength
;
761 // Check that length passed in is consistent with length from Spoof data header.
763 if (length
< totalSize
) {
764 udata_printError(ds
, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
766 *status
=U_INDEX_OUTOFBOUNDS_ERROR
;
772 // Swap the Data. Do the data itself first, then the Spoof Data Header, because
773 // we need to reference the header to locate the data, and an
774 // inplace swap of the header leaves it unusable.
776 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
777 SpoofDataHeader
*outputDH
= (SpoofDataHeader
*)outBytes
;
779 int32_t sectionStart
;
780 int32_t sectionLength
;
783 // If not swapping in place, zero out the output buffer before starting.
784 // Gaps may exist between the individual sections, and these must be zeroed in
785 // the output buffer. The simplest way to do that is to just zero the whole thing.
787 if (inBytes
!= outBytes
) {
788 uprv_memset(outBytes
, 0, spoofDataLength
);
791 // Confusables Keys Section (fCFUKeys)
792 sectionStart
= ds
->readUInt32(spoofDH
->fCFUKeys
);
793 sectionLength
= ds
->readUInt32(spoofDH
->fCFUKeysSize
) * 4;
794 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
796 // String Index Section
797 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringIndex
);
798 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringIndexSize
) * 2;
799 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
801 // String Table Section
802 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringTable
);
803 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringTableLen
) * 2;
804 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
806 // String Lengths Section
807 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringLengths
);
808 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringLengthsSize
) * 4;
809 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
812 sectionStart
= ds
->readUInt32(spoofDH
->fAnyCaseTrie
);
813 sectionLength
= ds
->readUInt32(spoofDH
->fAnyCaseTrieLength
);
814 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
817 sectionStart
= ds
->readUInt32(spoofDH
->fLowerCaseTrie
);
818 sectionLength
= ds
->readUInt32(spoofDH
->fLowerCaseTrieLength
);
819 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
821 // Script Sets. The data is an array of int32_t
822 sectionStart
= ds
->readUInt32(spoofDH
->fScriptSets
);
823 sectionLength
= ds
->readUInt32(spoofDH
->fScriptSetsLength
) * sizeof(ScriptSet
);
824 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
826 // And, last, swap the header itself.
827 // int32_t fMagic // swap this
828 // uint8_t fFormatVersion[4] // Do not swap this, just copy
829 // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff.
831 uint32_t magic
= ds
->readUInt32(spoofDH
->fMagic
);
832 ds
->writeUInt32((uint32_t *)&outputDH
->fMagic
, magic
);
834 if (outputDH
->fFormatVersion
!= spoofDH
->fFormatVersion
) {
835 uprv_memcpy(outputDH
->fFormatVersion
, spoofDH
->fFormatVersion
, sizeof(spoofDH
->fFormatVersion
));
837 // swap starting at fLength
838 ds
->swapArray32(ds
, &spoofDH
->fLength
, sizeof(SpoofDataHeader
)-8 /* minus magic and fFormatVersion[4] */, &outputDH
->fLength
, status
);