2 **********************************************************************
3 * Copyright (C) 2008-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
9 #include "unicode/uspoof.h"
10 #include "unicode/uchar.h"
11 #include "unicode/uniset.h"
12 #include "unicode/utf16.h"
16 #include "identifier_info.h"
17 #include "scriptset.h"
21 #include "uspoof_impl.h"
23 #if !UCONFIG_NO_NORMALIZATION
28 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl
)
30 SpoofImpl::SpoofImpl(SpoofData
*data
, UErrorCode
&status
) :
31 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(data
), fAllowedCharsSet(NULL
) ,
32 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
33 if (U_FAILURE(status
)) {
36 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
38 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
39 allowedCharsSet
->freeze();
40 fAllowedCharsSet
= allowedCharsSet
;
41 fAllowedLocales
= uprv_strdup("");
42 if (fAllowedCharsSet
== NULL
|| fAllowedLocales
== NULL
) {
43 status
= U_MEMORY_ALLOCATION_ERROR
;
46 fMagic
= USPOOF_MAGIC
;
50 SpoofImpl::SpoofImpl() :
51 fMagic(USPOOF_MAGIC
), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
52 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
53 UnicodeSet
*allowedCharsSet
= new UnicodeSet(0, 0x10ffff);
54 allowedCharsSet
->freeze();
55 fAllowedCharsSet
= allowedCharsSet
;
56 fAllowedLocales
= uprv_strdup("");
57 fRestrictionLevel
= USPOOF_HIGHLY_RESTRICTIVE
;
61 // Copy Constructor, used by the user level clone() function.
62 SpoofImpl::SpoofImpl(const SpoofImpl
&src
, UErrorCode
&status
) :
63 fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,
64 fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) {
65 if (U_FAILURE(status
)) {
69 fChecks
= src
.fChecks
;
70 if (src
.fSpoofData
!= NULL
) {
71 fSpoofData
= src
.fSpoofData
->addReference();
73 fAllowedCharsSet
= static_cast<const UnicodeSet
*>(src
.fAllowedCharsSet
->clone());
74 if (fAllowedCharsSet
== NULL
) {
75 status
= U_MEMORY_ALLOCATION_ERROR
;
77 fAllowedLocales
= uprv_strdup(src
.fAllowedLocales
);
78 fRestrictionLevel
= src
.fRestrictionLevel
;
81 SpoofImpl::~SpoofImpl() {
82 fMagic
= 0; // head off application errors by preventing use of
83 // of deleted objects.
84 if (fSpoofData
!= NULL
) {
85 fSpoofData
->removeReference(); // Will delete if refCount goes to zero.
87 delete fAllowedCharsSet
;
88 uprv_free((void *)fAllowedLocales
);
89 delete fCachedIdentifierInfo
;
93 // Incoming parameter check on Status and the SpoofChecker object
94 // received from the C API.
96 const SpoofImpl
*SpoofImpl::validateThis(const USpoofChecker
*sc
, UErrorCode
&status
) {
97 if (U_FAILURE(status
)) {
101 status
= U_ILLEGAL_ARGUMENT_ERROR
;
104 SpoofImpl
*This
= (SpoofImpl
*)sc
;
105 if (This
->fMagic
!= USPOOF_MAGIC
||
106 This
->fSpoofData
== NULL
) {
107 status
= U_INVALID_FORMAT_ERROR
;
110 if (!SpoofData::validateDataVersion(This
->fSpoofData
->fRawData
, status
)) {
116 SpoofImpl
*SpoofImpl::validateThis(USpoofChecker
*sc
, UErrorCode
&status
) {
117 return const_cast<SpoofImpl
*>
118 (SpoofImpl::validateThis(const_cast<const USpoofChecker
*>(sc
), status
));
123 //--------------------------------------------------------------------------------------
125 // confusableLookup() This is the heart of the confusable skeleton generation
128 // Given a source character, produce the corresponding
129 // replacement character(s), appending them to the dest string.
131 //---------------------------------------------------------------------------------------
132 int32_t SpoofImpl::confusableLookup(UChar32 inChar
, int32_t tableMask
, UnicodeString
&dest
) const {
134 // Binary search the spoof data key table for the inChar
135 int32_t *low
= fSpoofData
->fCFUKeys
;
137 int32_t *limit
= low
+ fSpoofData
->fRawData
->fCFUKeysSize
;
140 int32_t delta
= ((int32_t)(limit
-low
))/2;
142 midc
= *mid
& 0x1fffff;
143 if (inChar
== midc
) {
145 } else if (inChar
< midc
) {
150 } while (low
< limit
-1);
152 midc
= *mid
& 0x1fffff;
153 if (inChar
!= midc
) {
154 // Char not found. It maps to itself.
160 int32_t keyFlags
= *mid
& 0xff000000;
161 if ((keyFlags
& tableMask
) == 0) {
162 // We found the right key char, but the entry doesn't pertain to the
163 // table we need. See if there is an adjacent key that does
164 if (keyFlags
& USPOOF_KEY_MULTIPLE_VALUES
) {
166 for (altMid
= mid
-1; (*altMid
&0x00ffffff) == inChar
; altMid
--) {
167 keyFlags
= *altMid
& 0xff000000;
168 if (keyFlags
& tableMask
) {
173 for (altMid
= mid
+1; (*altMid
&0x00ffffff) == inChar
; altMid
++) {
174 keyFlags
= *altMid
& 0xff000000;
175 if (keyFlags
& tableMask
) {
181 // No key entry for this char & table.
182 // The input char maps to itself.
189 int32_t stringLen
= USPOOF_KEY_LENGTH_FIELD(keyFlags
) + 1;
190 int32_t keyTableIndex
= (int32_t)(mid
- fSpoofData
->fCFUKeys
);
192 // Value is either a UChar (for strings of length 1) or
193 // an index into the string table (for longer strings)
194 uint16_t value
= fSpoofData
->fCFUValues
[keyTableIndex
];
195 if (stringLen
== 1) {
196 dest
.append((UChar
)value
);
200 // String length of 4 from the above lookup is used for all strings of length >= 4.
201 // For these, get the real length from the string lengths table,
202 // which maps string table indexes to lengths.
203 // All strings of the same length are stored contiguously in the string table.
204 // 'value' from the lookup above is the starting index for the desired string.
207 if (stringLen
== 4) {
208 int32_t stringLengthsLimit
= fSpoofData
->fRawData
->fCFUStringLengthsSize
;
209 for (ix
= 0; ix
< stringLengthsLimit
; ix
++) {
210 if (fSpoofData
->fCFUStringLengths
[ix
].fLastString
>= value
) {
211 stringLen
= fSpoofData
->fCFUStringLengths
[ix
].fStrLength
;
215 U_ASSERT(ix
< stringLengthsLimit
);
218 U_ASSERT(value
+ stringLen
<= fSpoofData
->fRawData
->fCFUStringTableLen
);
219 UChar
*src
= &fSpoofData
->fCFUStrings
[value
];
220 dest
.append(src
, stringLen
);
225 //---------------------------------------------------------------------------------------
227 // wholeScriptCheck()
229 // Input text is already normalized to NFD
230 // Return the set of scripts, each of which can represent something that is
231 // confusable with the input text. The script of the input text
232 // is included; input consisting of characters from a single script will
233 // always produce a result consisting of a set containing that script.
235 //---------------------------------------------------------------------------------------
236 void SpoofImpl::wholeScriptCheck(
237 const UnicodeString
&text
, ScriptSet
*result
, UErrorCode
&status
) const {
240 (fChecks
& USPOOF_ANY_CASE
) ? fSpoofData
->fAnyCaseTrie
: fSpoofData
->fLowerCaseTrie
;
242 int32_t length
= text
.length();
243 for (int32_t inputIdx
=0; inputIdx
< length
;) {
244 UChar32 c
= text
.char32At(inputIdx
);
245 inputIdx
+= U16_LENGTH(c
);
246 uint32_t index
= utrie2_get32(table
, c
);
248 // No confusables in another script for this char.
249 // TODO: we should change the data to have sets with just the single script
250 // bit for the script of this char. Gets rid of this special case.
251 // Until then, grab the script from the char and intersect it with the set.
252 UScriptCode cpScript
= uscript_getScript(c
, &status
);
253 U_ASSERT(cpScript
> USCRIPT_INHERITED
);
254 result
->intersect(cpScript
, status
);
255 } else if (index
== 1) {
256 // Script == Common or Inherited. Nothing to do.
258 result
->intersect(fSpoofData
->fScriptSets
[index
]);
264 void SpoofImpl::setAllowedLocales(const char *localesList
, UErrorCode
&status
) {
265 UnicodeSet allowedChars
;
266 UnicodeSet
*tmpSet
= NULL
;
267 const char *locStart
= localesList
;
268 const char *locEnd
= NULL
;
269 const char *localesListEnd
= localesList
+ uprv_strlen(localesList
);
270 int32_t localeListCount
= 0; // Number of locales provided by caller.
272 // Loop runs once per locale from the localesList, a comma separated list of locales.
274 locEnd
= uprv_strchr(locStart
, ',');
275 if (locEnd
== NULL
) {
276 locEnd
= localesListEnd
;
278 while (*locStart
== ' ') {
281 const char *trimmedEnd
= locEnd
-1;
282 while (trimmedEnd
> locStart
&& *trimmedEnd
== ' ') {
285 if (trimmedEnd
<= locStart
) {
288 const char *locale
= uprv_strndup(locStart
, (int32_t)(trimmedEnd
+ 1 - locStart
));
291 // We have one locale from the locales list.
292 // Add the script chars for this locale to the accumulating set of allowed chars.
293 // If the locale is no good, we will be notified back via status.
294 addScriptChars(locale
, &allowedChars
, status
);
295 uprv_free((void *)locale
);
296 if (U_FAILURE(status
)) {
299 locStart
= locEnd
+ 1;
300 } while (locStart
< localesListEnd
);
302 // If our caller provided an empty list of locales, we disable the allowed characters checking
303 if (localeListCount
== 0) {
304 uprv_free((void *)fAllowedLocales
);
305 fAllowedLocales
= uprv_strdup("");
306 tmpSet
= new UnicodeSet(0, 0x10ffff);
307 if (fAllowedLocales
== NULL
|| tmpSet
== NULL
) {
308 status
= U_MEMORY_ALLOCATION_ERROR
;
312 delete fAllowedCharsSet
;
313 fAllowedCharsSet
= tmpSet
;
314 fChecks
&= ~USPOOF_CHAR_LIMIT
;
319 // Add all common and inherited characters to the set of allowed chars.
321 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_COMMON
, status
);
322 allowedChars
.addAll(tempSet
);
323 tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_INHERITED
, status
);
324 allowedChars
.addAll(tempSet
);
326 // If anything went wrong, we bail out without changing
327 // the state of the spoof checker.
328 if (U_FAILURE(status
)) {
332 // Store the updated spoof checker state.
333 tmpSet
= static_cast<UnicodeSet
*>(allowedChars
.clone());
334 const char *tmpLocalesList
= uprv_strdup(localesList
);
335 if (tmpSet
== NULL
|| tmpLocalesList
== NULL
) {
336 status
= U_MEMORY_ALLOCATION_ERROR
;
339 uprv_free((void *)fAllowedLocales
);
340 fAllowedLocales
= tmpLocalesList
;
342 delete fAllowedCharsSet
;
343 fAllowedCharsSet
= tmpSet
;
344 fChecks
|= USPOOF_CHAR_LIMIT
;
348 const char * SpoofImpl::getAllowedLocales(UErrorCode
&/*status*/) {
349 return fAllowedLocales
;
353 // Given a locale (a language), add all the characters from all of the scripts used with that language
354 // to the allowedChars UnicodeSet
356 void SpoofImpl::addScriptChars(const char *locale
, UnicodeSet
*allowedChars
, UErrorCode
&status
) {
357 UScriptCode scripts
[30];
359 int32_t numScripts
= uscript_getCode(locale
, scripts
, UPRV_LENGTHOF(scripts
), &status
);
360 if (U_FAILURE(status
)) {
363 if (status
== U_USING_DEFAULT_WARNING
) {
364 status
= U_ILLEGAL_ARGUMENT_ERROR
;
369 for (i
=0; i
<numScripts
; i
++) {
370 tmpSet
.applyIntPropertyValue(UCHAR_SCRIPT
, scripts
[i
], status
);
371 allowedChars
->addAll(tmpSet
);
376 // Convert a text format hex number. Utility function used by builder code. Static.
377 // Input: UChar *string text. Output: a UChar32
378 // Input has been pre-checked, and will have no non-hex chars.
379 // The number must fall in the code point range of 0..0x10ffff
381 UChar32
SpoofImpl::ScanHex(const UChar
*s
, int32_t start
, int32_t limit
, UErrorCode
&status
) {
382 if (U_FAILURE(status
)) {
385 U_ASSERT(limit
-start
> 0);
388 for (i
=start
; i
<limit
; i
++) {
389 int digitVal
= s
[i
] - 0x30;
391 digitVal
= 0xa + (s
[i
] - 0x41); // Upper Case 'A'
394 digitVal
= 0xa + (s
[i
] - 0x61); // Lower Case 'a'
396 U_ASSERT(digitVal
<= 0xf);
400 if (val
> 0x10ffff) {
401 status
= U_PARSE_ERROR
;
407 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
408 // Maintain a one-element cache, which is sufficient to avoid repeatedly
409 // creating new ones unless we get multi-thread concurrency in spoof
410 // check operations, which should be statistically uncommon.
412 // These functions are used in place of new & delete of an IdentifierInfo.
413 // They will recycle the IdentifierInfo when possible.
414 // They are logically const, and used within const functions that must be thread safe.
415 IdentifierInfo
*SpoofImpl::getIdentifierInfo(UErrorCode
&status
) const {
416 IdentifierInfo
*returnIdInfo
= NULL
;
417 if (U_FAILURE(status
)) {
420 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
423 returnIdInfo
= nonConstThis
->fCachedIdentifierInfo
;
424 nonConstThis
->fCachedIdentifierInfo
= NULL
;
426 if (returnIdInfo
== NULL
) {
427 returnIdInfo
= new IdentifierInfo(status
);
428 if (U_SUCCESS(status
) && returnIdInfo
== NULL
) {
429 status
= U_MEMORY_ALLOCATION_ERROR
;
431 if (U_FAILURE(status
) && returnIdInfo
!= NULL
) {
440 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo
*idInfo
) const {
441 if (idInfo
!= NULL
) {
442 SpoofImpl
*nonConstThis
= const_cast<SpoofImpl
*>(this);
445 if (nonConstThis
->fCachedIdentifierInfo
== NULL
) {
446 nonConstThis
->fCachedIdentifierInfo
= idInfo
;
457 //----------------------------------------------------------------------------------------------
459 // class SpoofData Implementation
461 //----------------------------------------------------------------------------------------------
464 UBool
SpoofData::validateDataVersion(const SpoofDataHeader
*rawData
, UErrorCode
&status
) {
465 if (U_FAILURE(status
) ||
467 rawData
->fMagic
!= USPOOF_MAGIC
||
468 rawData
->fFormatVersion
[0] > 1 ||
469 rawData
->fFormatVersion
[1] > 0) {
470 status
= U_INVALID_FORMAT_ERROR
;
476 static UBool U_CALLCONV
477 spoofDataIsAcceptable(void *context
,
478 const char * /* type */, const char * /*name*/,
479 const UDataInfo
*pInfo
) {
482 pInfo
->isBigEndian
== U_IS_BIG_ENDIAN
&&
483 pInfo
->charsetFamily
== U_CHARSET_FAMILY
&&
484 pInfo
->dataFormat
[0] == 0x43 && // dataFormat="Cfu "
485 pInfo
->dataFormat
[1] == 0x66 &&
486 pInfo
->dataFormat
[2] == 0x75 &&
487 pInfo
->dataFormat
[3] == 0x20 &&
488 pInfo
->formatVersion
[0] == 1
490 UVersionInfo
*version
= static_cast<UVersionInfo
*>(context
);
491 if(version
!= NULL
) {
492 uprv_memcpy(version
, pInfo
->dataVersion
, 4);
501 // SpoofData::getDefault() - return a wrapper around the spoof data that is
502 // baked into the default ICU data.
504 // Called once, from the initOnce() function in uspoof_impl.cpp; the resulting
505 // SpoofData is shared by all spoof checkers using the default data.
507 SpoofData
*SpoofData::getDefault(UErrorCode
&status
) {
508 UDataMemory
*udm
= udata_openChoice(NULL
, "cfu", "confusables",
509 spoofDataIsAcceptable
,
510 NULL
, // context, would receive dataVersion if supplied.
512 if (U_FAILURE(status
)) {
515 SpoofData
*This
= new SpoofData(udm
, status
);
516 if (U_FAILURE(status
)) {
521 status
= U_MEMORY_ALLOCATION_ERROR
;
526 SpoofData::SpoofData(UDataMemory
*udm
, UErrorCode
&status
)
529 if (U_FAILURE(status
)) {
533 // fRawData is non-const because it may be constructed by the data builder.
534 fRawData
= reinterpret_cast<SpoofDataHeader
*>(
535 const_cast<void *>(udata_getMemory(udm
)));
536 validateDataVersion(fRawData
, status
);
541 SpoofData::SpoofData(const void *data
, int32_t length
, UErrorCode
&status
)
544 if (U_FAILURE(status
)) {
547 if ((size_t)length
< sizeof(SpoofDataHeader
)) {
548 status
= U_INVALID_FORMAT_ERROR
;
551 void *ncData
= const_cast<void *>(data
);
552 fRawData
= static_cast<SpoofDataHeader
*>(ncData
);
553 if (length
< fRawData
->fLength
) {
554 status
= U_INVALID_FORMAT_ERROR
;
557 validateDataVersion(fRawData
, status
);
562 // Spoof Data constructor for use from data builder.
563 // Initializes a new, empty data area that will be populated later.
564 SpoofData::SpoofData(UErrorCode
&status
) {
566 if (U_FAILURE(status
)) {
571 // The spoof header should already be sized to be a multiple of 16 bytes.
572 // Just in case it's not, round it up.
573 uint32_t initialSize
= (sizeof(SpoofDataHeader
) + 15) & ~15;
574 U_ASSERT(initialSize
== sizeof(SpoofDataHeader
));
576 fRawData
= static_cast<SpoofDataHeader
*>(uprv_malloc(initialSize
));
577 fMemLimit
= initialSize
;
578 if (fRawData
== NULL
) {
579 status
= U_MEMORY_ALLOCATION_ERROR
;
582 uprv_memset(fRawData
, 0, initialSize
);
584 fRawData
->fMagic
= USPOOF_MAGIC
;
585 fRawData
->fFormatVersion
[0] = 1;
586 fRawData
->fFormatVersion
[1] = 0;
587 fRawData
->fFormatVersion
[2] = 0;
588 fRawData
->fFormatVersion
[3] = 0;
592 // reset() - initialize all fields.
593 // Should be updated if any new fields are added.
594 // Called by constructors to put things in a known initial state.
595 void SpoofData::reset() {
603 fCFUStringLengths
= NULL
;
606 fLowerCaseTrie
= NULL
;
611 // SpoofData::initPtrs()
612 // Initialize the pointers to the various sections of the raw data.
614 // This function is used both during the Trie building process (multiple
615 // times, as the individual data sections are added), and
616 // during the opening of a Spoof Checker from prebuilt data.
618 // The pointers for non-existent data sections (identified by an offset of 0)
621 // Note: During building the data, adding each new data section
622 // reallocs the raw data area, which likely relocates it, which
623 // in turn requires reinitializing all of the pointers into it, hence
624 // multiple calls to this function during building.
626 void SpoofData::initPtrs(UErrorCode
&status
) {
629 fCFUStringLengths
= NULL
;
631 if (U_FAILURE(status
)) {
634 if (fRawData
->fCFUKeys
!= 0) {
635 fCFUKeys
= (int32_t *)((char *)fRawData
+ fRawData
->fCFUKeys
);
637 if (fRawData
->fCFUStringIndex
!= 0) {
638 fCFUValues
= (uint16_t *)((char *)fRawData
+ fRawData
->fCFUStringIndex
);
640 if (fRawData
->fCFUStringLengths
!= 0) {
641 fCFUStringLengths
= (SpoofStringLengthsElement
*)((char *)fRawData
+ fRawData
->fCFUStringLengths
);
643 if (fRawData
->fCFUStringTable
!= 0) {
644 fCFUStrings
= (UChar
*)((char *)fRawData
+ fRawData
->fCFUStringTable
);
647 if (fAnyCaseTrie
== NULL
&& fRawData
->fAnyCaseTrie
!= 0) {
648 fAnyCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
649 (char *)fRawData
+ fRawData
->fAnyCaseTrie
, fRawData
->fAnyCaseTrieLength
, NULL
, &status
);
651 if (fLowerCaseTrie
== NULL
&& fRawData
->fLowerCaseTrie
!= 0) {
652 fLowerCaseTrie
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
,
653 (char *)fRawData
+ fRawData
->fLowerCaseTrie
, fRawData
->fLowerCaseTrieLength
, NULL
, &status
);
656 if (fRawData
->fScriptSets
!= 0) {
657 fScriptSets
= (ScriptSet
*)((char *)fRawData
+ fRawData
->fScriptSets
);
662 SpoofData::~SpoofData() {
663 utrie2_close(fAnyCaseTrie
);
665 utrie2_close(fLowerCaseTrie
);
666 fLowerCaseTrie
= NULL
;
678 void SpoofData::removeReference() {
679 if (umtx_atomic_dec(&fRefCount
) == 0) {
685 SpoofData
*SpoofData::addReference() {
686 umtx_atomic_inc(&fRefCount
);
691 void *SpoofData::reserveSpace(int32_t numBytes
, UErrorCode
&status
) {
692 if (U_FAILURE(status
)) {
697 status
= U_INTERNAL_PROGRAM_ERROR
;
701 numBytes
= (numBytes
+ 15) & ~15; // Round up to a multiple of 16
702 uint32_t returnOffset
= fMemLimit
;
703 fMemLimit
+= numBytes
;
704 fRawData
= static_cast<SpoofDataHeader
*>(uprv_realloc(fRawData
, fMemLimit
));
705 fRawData
->fLength
= fMemLimit
;
706 uprv_memset((char *)fRawData
+ returnOffset
, 0, numBytes
);
708 return (char *)fRawData
+ returnOffset
;
716 //-----------------------------------------------------------------------------
718 // uspoof_swap - byte swap and char encoding swap of spoof data
720 //-----------------------------------------------------------------------------
721 U_CAPI
int32_t U_EXPORT2
722 uspoof_swap(const UDataSwapper
*ds
, const void *inData
, int32_t length
, void *outData
,
723 UErrorCode
*status
) {
725 if (status
== NULL
|| U_FAILURE(*status
)) {
728 if(ds
==NULL
|| inData
==NULL
|| length
<-1 || (length
>0 && outData
==NULL
)) {
729 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
734 // Check that the data header is for spoof data.
735 // (Header contents are defined in gencfu.cpp)
737 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+4);
738 if(!( pInfo
->dataFormat
[0]==0x43 && /* dataFormat="Cfu " */
739 pInfo
->dataFormat
[1]==0x66 &&
740 pInfo
->dataFormat
[2]==0x75 &&
741 pInfo
->dataFormat
[3]==0x20 &&
742 pInfo
->formatVersion
[0]==1 )) {
743 udata_printError(ds
, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
744 "(format version %02x %02x %02x %02x) is not recognized\n",
745 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
746 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
747 pInfo
->formatVersion
[0], pInfo
->formatVersion
[1],
748 pInfo
->formatVersion
[2], pInfo
->formatVersion
[3]);
749 *status
=U_UNSUPPORTED_ERROR
;
754 // Swap the data header. (This is the generic ICU Data Header, not the uspoof Specific
755 // header). This swap also conveniently gets us
756 // the size of the ICU d.h., which lets us locate the start
757 // of the uspoof specific data.
759 int32_t headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, status
);
763 // Get the Spoof Data Header, and check that it appears to be OK.
766 const uint8_t *inBytes
=(const uint8_t *)inData
+headerSize
;
767 SpoofDataHeader
*spoofDH
= (SpoofDataHeader
*)inBytes
;
768 if (ds
->readUInt32(spoofDH
->fMagic
) != USPOOF_MAGIC
||
769 ds
->readUInt32(spoofDH
->fLength
) < sizeof(SpoofDataHeader
))
771 udata_printError(ds
, "uspoof_swap(): Spoof Data header is invalid.\n");
772 *status
=U_UNSUPPORTED_ERROR
;
777 // Prefight operation? Just return the size
779 int32_t spoofDataLength
= ds
->readUInt32(spoofDH
->fLength
);
780 int32_t totalSize
= headerSize
+ spoofDataLength
;
786 // Check that length passed in is consistent with length from Spoof data header.
788 if (length
< totalSize
) {
789 udata_printError(ds
, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
791 *status
=U_INDEX_OUTOFBOUNDS_ERROR
;
797 // Swap the Data. Do the data itself first, then the Spoof Data Header, because
798 // we need to reference the header to locate the data, and an
799 // inplace swap of the header leaves it unusable.
801 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
802 SpoofDataHeader
*outputDH
= (SpoofDataHeader
*)outBytes
;
804 int32_t sectionStart
;
805 int32_t sectionLength
;
808 // If not swapping in place, zero out the output buffer before starting.
809 // Gaps may exist between the individual sections, and these must be zeroed in
810 // the output buffer. The simplest way to do that is to just zero the whole thing.
812 if (inBytes
!= outBytes
) {
813 uprv_memset(outBytes
, 0, spoofDataLength
);
816 // Confusables Keys Section (fCFUKeys)
817 sectionStart
= ds
->readUInt32(spoofDH
->fCFUKeys
);
818 sectionLength
= ds
->readUInt32(spoofDH
->fCFUKeysSize
) * 4;
819 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
821 // String Index Section
822 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringIndex
);
823 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringIndexSize
) * 2;
824 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
826 // String Table Section
827 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringTable
);
828 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringTableLen
) * 2;
829 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
831 // String Lengths Section
832 sectionStart
= ds
->readUInt32(spoofDH
->fCFUStringLengths
);
833 sectionLength
= ds
->readUInt32(spoofDH
->fCFUStringLengthsSize
) * 4;
834 ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
837 sectionStart
= ds
->readUInt32(spoofDH
->fAnyCaseTrie
);
838 sectionLength
= ds
->readUInt32(spoofDH
->fAnyCaseTrieLength
);
839 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
842 sectionStart
= ds
->readUInt32(spoofDH
->fLowerCaseTrie
);
843 sectionLength
= ds
->readUInt32(spoofDH
->fLowerCaseTrieLength
);
844 utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
846 // Script Sets. The data is an array of int32_t
847 sectionStart
= ds
->readUInt32(spoofDH
->fScriptSets
);
848 sectionLength
= ds
->readUInt32(spoofDH
->fScriptSetsLength
) * sizeof(ScriptSet
);
849 ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
);
851 // And, last, swap the header itself.
852 // int32_t fMagic // swap this
853 // uint8_t fFormatVersion[4] // Do not swap this, just copy
854 // int32_t fLength and all the rest // Swap the rest, all is 32 bit stuff.
856 uint32_t magic
= ds
->readUInt32(spoofDH
->fMagic
);
857 ds
->writeUInt32((uint32_t *)&outputDH
->fMagic
, magic
);
859 if (outputDH
->fFormatVersion
!= spoofDH
->fFormatVersion
) {
860 uprv_memcpy(outputDH
->fFormatVersion
, spoofDH
->fFormatVersion
, sizeof(spoofDH
->fFormatVersion
));
862 // swap starting at fLength
863 ds
->swapArray32(ds
, &spoofDH
->fLength
, sizeof(SpoofDataHeader
)-8 /* minus magic and fFormatVersion[4] */, &outputDH
->fLength
, status
);