2 ********************************************************************** 
   3 *   Copyright (C) 2008-2013, International Business Machines 
   4 *   Corporation and others.  All Rights Reserved. 
   5 ********************************************************************** 
   8 #include "unicode/utypes.h" 
   9 #include "unicode/uspoof.h" 
  10 #include "unicode/uchar.h" 
  11 #include "unicode/uniset.h" 
  12 #include "unicode/utf16.h" 
  16 #include "identifier_info.h" 
  17 #include "scriptset.h" 
  22 #include "uspoof_impl.h" 
  24 #if !UCONFIG_NO_NORMALIZATION 
  29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl
) 
  31 SpoofImpl::SpoofImpl(SpoofData 
*data
, UErrorCode 
&status
) : 
  32         fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,  
  33         fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) { 
  34     if (U_FAILURE(status
)) { 
  38     fRestrictionLevel 
= USPOOF_HIGHLY_RESTRICTIVE
; 
  40     UnicodeSet 
*allowedCharsSet 
= new UnicodeSet(0, 0x10ffff); 
  41     allowedCharsSet
->freeze(); 
  42     fAllowedCharsSet 
= allowedCharsSet
; 
  43     fAllowedLocales  
= uprv_strdup(""); 
  44     if (fAllowedCharsSet 
== NULL 
|| fAllowedLocales 
== NULL
) { 
  45         status 
= U_MEMORY_ALLOCATION_ERROR
; 
  48     fMagic 
= USPOOF_MAGIC
; 
  52 SpoofImpl::SpoofImpl() : 
  53         fMagic(USPOOF_MAGIC
), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,  
  54         fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) { 
  55     UnicodeSet 
*allowedCharsSet 
= new UnicodeSet(0, 0x10ffff); 
  56     allowedCharsSet
->freeze(); 
  57     fAllowedCharsSet 
= allowedCharsSet
; 
  58     fAllowedLocales  
= uprv_strdup(""); 
  59     fRestrictionLevel 
= USPOOF_HIGHLY_RESTRICTIVE
; 
  63 // Copy Constructor, used by the user level clone() function. 
  64 SpoofImpl::SpoofImpl(const SpoofImpl 
&src
, UErrorCode 
&status
)  : 
  65         fMagic(0), fChecks(USPOOF_ALL_CHECKS
), fSpoofData(NULL
), fAllowedCharsSet(NULL
) ,  
  66         fAllowedLocales(NULL
), fCachedIdentifierInfo(NULL
) { 
  67     if (U_FAILURE(status
)) { 
  71     fChecks 
= src
.fChecks
; 
  72     if (src
.fSpoofData 
!= NULL
) { 
  73         fSpoofData 
= src
.fSpoofData
->addReference(); 
  75     fAllowedCharsSet 
= static_cast<const UnicodeSet 
*>(src
.fAllowedCharsSet
->clone()); 
  76     if (fAllowedCharsSet 
== NULL
) { 
  77         status 
= U_MEMORY_ALLOCATION_ERROR
; 
  79     fAllowedLocales 
= uprv_strdup(src
.fAllowedLocales
); 
  80     fRestrictionLevel 
= src
.fRestrictionLevel
; 
  83 SpoofImpl::~SpoofImpl() { 
  84     fMagic 
= 0;                // head off application errors by preventing use of 
  85                                //    of deleted objects. 
  86     if (fSpoofData 
!= NULL
) { 
  87         fSpoofData
->removeReference();   // Will delete if refCount goes to zero. 
  89     delete fAllowedCharsSet
; 
  90     uprv_free((void *)fAllowedLocales
); 
  91     delete fCachedIdentifierInfo
; 
  95 //  Incoming parameter check on Status and the SpoofChecker object 
  96 //    received from the C API. 
  98 const SpoofImpl 
*SpoofImpl::validateThis(const USpoofChecker 
*sc
, UErrorCode 
&status
) { 
  99     if (U_FAILURE(status
)) { 
 103         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 106     SpoofImpl 
*This 
= (SpoofImpl 
*)sc
; 
 107     if (This
->fMagic 
!= USPOOF_MAGIC 
|| 
 108         This
->fSpoofData 
== NULL
) { 
 109         status 
= U_INVALID_FORMAT_ERROR
; 
 112     if (!SpoofData::validateDataVersion(This
->fSpoofData
->fRawData
, status
)) { 
 118 SpoofImpl 
*SpoofImpl::validateThis(USpoofChecker 
*sc
, UErrorCode 
&status
) { 
 119     return const_cast<SpoofImpl 
*> 
 120         (SpoofImpl::validateThis(const_cast<const USpoofChecker 
*>(sc
), status
)); 
 125 //-------------------------------------------------------------------------------------- 
 127 //  confusableLookup()    This is the heart of the confusable skeleton generation 
 130 //                        Given a source character, produce the corresponding 
 131 //                        replacement character(s), appending them to the dest string. 
 133 //--------------------------------------------------------------------------------------- 
 134 int32_t SpoofImpl::confusableLookup(UChar32 inChar
, int32_t tableMask
, UnicodeString 
&dest
) const { 
 136     // Binary search the spoof data key table for the inChar 
 137     int32_t  *low   
= fSpoofData
->fCFUKeys
; 
 139     int32_t  *limit 
= low 
+ fSpoofData
->fRawData
->fCFUKeysSize
; 
 142         int32_t delta 
= ((int32_t)(limit
-low
))/2; 
 144         midc 
= *mid 
& 0x1fffff; 
 145         if (inChar 
== midc
) { 
 147         } else if (inChar 
< midc
) { 
 152     } while (low 
< limit
-1); 
 154     midc 
= *mid 
& 0x1fffff; 
 155     if (inChar 
!= midc
) { 
 156         // Char not found.  It maps to itself. 
 162     int32_t keyFlags 
= *mid 
& 0xff000000; 
 163     if ((keyFlags 
& tableMask
) == 0) { 
 164         // We found the right key char, but the entry doesn't pertain to the 
 165         //  table we need.  See if there is an adjacent key that does 
 166         if (keyFlags 
& USPOOF_KEY_MULTIPLE_VALUES
) { 
 168             for (altMid 
= mid
-1; (*altMid
&0x00ffffff) == inChar
; altMid
--) { 
 169                 keyFlags 
= *altMid 
& 0xff000000; 
 170                 if (keyFlags 
& tableMask
) { 
 175             for (altMid 
= mid
+1; (*altMid
&0x00ffffff) == inChar
; altMid
++) { 
 176                 keyFlags 
= *altMid 
& 0xff000000; 
 177                 if (keyFlags 
& tableMask
) { 
 183         // No key entry for this char & table. 
 184         // The input char maps to itself. 
 191     int32_t  stringLen 
= USPOOF_KEY_LENGTH_FIELD(keyFlags
) + 1; 
 192     int32_t keyTableIndex 
= (int32_t)(mid 
- fSpoofData
->fCFUKeys
); 
 194     // Value is either a UChar  (for strings of length 1) or 
 195     //                 an index into the string table (for longer strings) 
 196     uint16_t value 
= fSpoofData
->fCFUValues
[keyTableIndex
]; 
 197     if (stringLen 
== 1) { 
 198         dest
.append((UChar
)value
); 
 202     // String length of 4 from the above lookup is used for all strings of length >= 4. 
 203     // For these, get the real length from the string lengths table, 
 204     //   which maps string table indexes to lengths. 
 205     //   All strings of the same length are stored contiguously in the string table. 
 206     //   'value' from the lookup above is the starting index for the desired string. 
 209     if (stringLen 
== 4) { 
 210         int32_t stringLengthsLimit 
= fSpoofData
->fRawData
->fCFUStringLengthsSize
; 
 211         for (ix 
= 0; ix 
< stringLengthsLimit
; ix
++) { 
 212             if (fSpoofData
->fCFUStringLengths
[ix
].fLastString 
>= value
) { 
 213                 stringLen 
= fSpoofData
->fCFUStringLengths
[ix
].fStrLength
; 
 217         U_ASSERT(ix 
< stringLengthsLimit
); 
 220     U_ASSERT(value 
+ stringLen 
<= fSpoofData
->fRawData
->fCFUStringTableLen
); 
 221     UChar 
*src 
= &fSpoofData
->fCFUStrings
[value
]; 
 222     dest
.append(src
, stringLen
); 
 227 //--------------------------------------------------------------------------------------- 
 229 //  wholeScriptCheck() 
 231 //      Input text is already normalized to NFD 
 232 //      Return the set of scripts, each of which can represent something that is 
 233 //             confusable with the input text.  The script of the input text 
 234 //             is included; input consisting of characters from a single script will 
 235 //             always produce a result consisting of a set containing that script. 
 237 //--------------------------------------------------------------------------------------- 
 238 void SpoofImpl::wholeScriptCheck( 
 239         const UnicodeString 
&text
, ScriptSet 
*result
, UErrorCode 
&status
) const { 
 242         (fChecks 
& USPOOF_ANY_CASE
) ? fSpoofData
->fAnyCaseTrie 
: fSpoofData
->fLowerCaseTrie
; 
 244     int32_t length 
= text
.length(); 
 245     for (int32_t inputIdx
=0; inputIdx 
< length
;) { 
 246         UChar32 c 
= text
.char32At(inputIdx
); 
 247         inputIdx 
+= U16_LENGTH(c
); 
 248         uint32_t index 
= utrie2_get32(table
, c
); 
 250             // No confusables in another script for this char. 
 251             // TODO:  we should change the data to have sets with just the single script 
 252             //        bit for the script of this char.  Gets rid of this special case. 
 253             //        Until then, grab the script from the char and intersect it with the set. 
 254             UScriptCode cpScript 
= uscript_getScript(c
, &status
); 
 255             U_ASSERT(cpScript 
> USCRIPT_INHERITED
); 
 256             result
->intersect(cpScript
, status
); 
 257         } else if (index 
== 1) { 
 258             // Script == Common or Inherited.  Nothing to do. 
 260             result
->intersect(fSpoofData
->fScriptSets
[index
]); 
 266 void SpoofImpl::setAllowedLocales(const char *localesList
, UErrorCode 
&status
) { 
 267     UnicodeSet    allowedChars
; 
 268     UnicodeSet    
*tmpSet 
= NULL
; 
 269     const char    *locStart 
= localesList
; 
 270     const char    *locEnd 
= NULL
; 
 271     const char    *localesListEnd 
= localesList 
+ uprv_strlen(localesList
); 
 272     int32_t        localeListCount 
= 0;   // Number of locales provided by caller. 
 274     // Loop runs once per locale from the localesList, a comma separated list of locales. 
 276         locEnd 
= uprv_strchr(locStart
, ','); 
 277         if (locEnd 
== NULL
) { 
 278             locEnd 
= localesListEnd
; 
 280         while (*locStart 
== ' ') { 
 283         const char *trimmedEnd 
= locEnd
-1; 
 284         while (trimmedEnd 
> locStart 
&& *trimmedEnd 
== ' ') { 
 287         if (trimmedEnd 
<= locStart
) { 
 290         const char *locale 
= uprv_strndup(locStart
, (int32_t)(trimmedEnd 
+ 1 - locStart
)); 
 293         // We have one locale from the locales list. 
 294         // Add the script chars for this locale to the accumulating set of allowed chars. 
 295         // If the locale is no good, we will be notified back via status. 
 296         addScriptChars(locale
, &allowedChars
, status
); 
 297         uprv_free((void *)locale
); 
 298         if (U_FAILURE(status
)) { 
 301         locStart 
= locEnd 
+ 1; 
 302     } while (locStart 
< localesListEnd
); 
 304     // If our caller provided an empty list of locales, we disable the allowed characters checking 
 305     if (localeListCount 
== 0) { 
 306         uprv_free((void *)fAllowedLocales
); 
 307         fAllowedLocales 
= uprv_strdup(""); 
 308         tmpSet 
= new UnicodeSet(0, 0x10ffff); 
 309         if (fAllowedLocales 
== NULL 
|| tmpSet 
== NULL
) { 
 310             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 314         delete fAllowedCharsSet
; 
 315         fAllowedCharsSet 
= tmpSet
; 
 316         fChecks 
&= ~USPOOF_CHAR_LIMIT
; 
 321     // Add all common and inherited characters to the set of allowed chars. 
 323     tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_COMMON
, status
); 
 324     allowedChars
.addAll(tempSet
); 
 325     tempSet
.applyIntPropertyValue(UCHAR_SCRIPT
, USCRIPT_INHERITED
, status
); 
 326     allowedChars
.addAll(tempSet
); 
 328     // If anything went wrong, we bail out without changing 
 329     // the state of the spoof checker. 
 330     if (U_FAILURE(status
)) { 
 334     // Store the updated spoof checker state. 
 335     tmpSet 
= static_cast<UnicodeSet 
*>(allowedChars
.clone()); 
 336     const char *tmpLocalesList 
= uprv_strdup(localesList
); 
 337     if (tmpSet 
== NULL 
|| tmpLocalesList 
== NULL
) { 
 338         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 341     uprv_free((void *)fAllowedLocales
); 
 342     fAllowedLocales 
= tmpLocalesList
; 
 344     delete fAllowedCharsSet
; 
 345     fAllowedCharsSet 
= tmpSet
; 
 346     fChecks 
|= USPOOF_CHAR_LIMIT
; 
 350 const char * SpoofImpl::getAllowedLocales(UErrorCode 
&/*status*/) { 
 351     return fAllowedLocales
; 
 355 // Given a locale (a language), add all the characters from all of the scripts used with that language 
 356 // to the allowedChars UnicodeSet 
 358 void SpoofImpl::addScriptChars(const char *locale
, UnicodeSet 
*allowedChars
, UErrorCode 
&status
) { 
 359     UScriptCode scripts
[30]; 
 361     int32_t numScripts 
= uscript_getCode(locale
, scripts
, sizeof(scripts
)/sizeof(UScriptCode
), &status
); 
 362     if (U_FAILURE(status
)) { 
 365     if (status 
== U_USING_DEFAULT_WARNING
) { 
 366         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 371     for (i
=0; i
<numScripts
; i
++) { 
 372         tmpSet
.applyIntPropertyValue(UCHAR_SCRIPT
, scripts
[i
], status
); 
 373         allowedChars
->addAll(tmpSet
); 
 378 // Convert a text format hex number.  Utility function used by builder code.  Static. 
 379 // Input: UChar *string text.  Output: a UChar32 
 380 // Input has been pre-checked, and will have no non-hex chars. 
 381 // The number must fall in the code point range of 0..0x10ffff 
 383 UChar32 
SpoofImpl::ScanHex(const UChar 
*s
, int32_t start
, int32_t limit
, UErrorCode 
&status
) { 
 384     if (U_FAILURE(status
)) { 
 387     U_ASSERT(limit
-start 
> 0); 
 390     for (i
=start
; i
<limit
; i
++) { 
 391         int digitVal 
= s
[i
] - 0x30; 
 393             digitVal 
= 0xa + (s
[i
] - 0x41);  // Upper Case 'A' 
 396             digitVal 
= 0xa + (s
[i
] - 0x61);  // Lower Case 'a' 
 398         U_ASSERT(digitVal 
<= 0xf); 
 402     if (val 
> 0x10ffff) { 
 403         status 
= U_PARSE_ERROR
; 
 409 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create. 
 410 //                       Maintain a one-element cache, which is sufficient to avoid repeatedly 
 411 //                       creating new ones unless we get multi-thread concurrency in spoof 
 412 //                       check operations, which should be statistically uncommon. 
 414 // These functions are used in place of new & delete of an IdentifierInfo. 
 415 // They will recycle the IdentifierInfo when possible. 
 416 // They are logically const, and used within const functions that must be thread safe. 
 417 IdentifierInfo 
*SpoofImpl::getIdentifierInfo(UErrorCode 
&status
) const { 
 418     IdentifierInfo 
*returnIdInfo 
= NULL
; 
 419     if (U_FAILURE(status
)) { 
 422     SpoofImpl 
*nonConstThis 
= const_cast<SpoofImpl 
*>(this); 
 425         returnIdInfo 
= nonConstThis
->fCachedIdentifierInfo
; 
 426         nonConstThis
->fCachedIdentifierInfo 
= NULL
; 
 428     if (returnIdInfo 
== NULL
) { 
 429         returnIdInfo 
= new IdentifierInfo(status
); 
 430         if (U_SUCCESS(status
) && returnIdInfo 
== NULL
) { 
 431             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 433         if (U_FAILURE(status
) && returnIdInfo 
!= NULL
) { 
 442 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo 
*idInfo
) const { 
 443     if (idInfo 
!= NULL
) { 
 444         SpoofImpl 
*nonConstThis 
= const_cast<SpoofImpl 
*>(this); 
 447             if (nonConstThis
->fCachedIdentifierInfo 
== NULL
) { 
 448                 nonConstThis
->fCachedIdentifierInfo 
= idInfo
; 
 459 //---------------------------------------------------------------------------------------------- 
 461 //   class SpoofData Implementation 
 463 //---------------------------------------------------------------------------------------------- 
 466 UBool 
SpoofData::validateDataVersion(const SpoofDataHeader 
*rawData
, UErrorCode 
&status
) { 
 467     if (U_FAILURE(status
) || 
 469         rawData
->fMagic 
!= USPOOF_MAGIC 
|| 
 470         rawData
->fFormatVersion
[0] > 1 || 
 471         rawData
->fFormatVersion
[1] > 0) { 
 472             status 
= U_INVALID_FORMAT_ERROR
; 
 479 //  SpoofData::getDefault() - return a wrapper around the spoof data that is 
 480 //                           baked into the default ICU data. 
 482 SpoofData 
*SpoofData::getDefault(UErrorCode 
&status
) { 
 483     // TODO:  Cache it.  Lazy create, keep until cleanup. 
 485     UDataMemory 
*udm 
= udata_open(NULL
, "cfu", "confusables", &status
); 
 486     if (U_FAILURE(status
)) { 
 489     SpoofData 
*This 
= new SpoofData(udm
, status
); 
 490     if (U_FAILURE(status
)) { 
 495         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 501 SpoofData::SpoofData(UDataMemory 
*udm
, UErrorCode 
&status
) 
 504     if (U_FAILURE(status
)) { 
 507     fRawData 
= reinterpret_cast<SpoofDataHeader 
*> 
 508                    ((char *)(udm
->pHeader
) + udm
->pHeader
->dataHeader
.headerSize
); 
 510     validateDataVersion(fRawData
, status
); 
 515 SpoofData::SpoofData(const void *data
, int32_t length
, UErrorCode 
&status
) 
 518     if (U_FAILURE(status
)) { 
 521     if ((size_t)length 
< sizeof(SpoofDataHeader
)) { 
 522         status 
= U_INVALID_FORMAT_ERROR
; 
 525     void *ncData 
= const_cast<void *>(data
); 
 526     fRawData 
= static_cast<SpoofDataHeader 
*>(ncData
); 
 527     if (length 
< fRawData
->fLength
) { 
 528         status 
= U_INVALID_FORMAT_ERROR
; 
 531     validateDataVersion(fRawData
, status
); 
 536 // Spoof Data constructor for use from data builder. 
 537 //   Initializes a new, empty data area that will be populated later. 
 538 SpoofData::SpoofData(UErrorCode 
&status
) { 
 540     if (U_FAILURE(status
)) { 
 546     // The spoof header should already be sized to be a multiple of 16 bytes. 
 547     // Just in case it's not, round it up. 
 548     uint32_t initialSize 
= (sizeof(SpoofDataHeader
) + 15) & ~15; 
 549     U_ASSERT(initialSize 
== sizeof(SpoofDataHeader
)); 
 551     fRawData 
= static_cast<SpoofDataHeader 
*>(uprv_malloc(initialSize
)); 
 552     fMemLimit 
= initialSize
; 
 553     if (fRawData 
== NULL
) { 
 554         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 557     uprv_memset(fRawData
, 0, initialSize
); 
 559     fRawData
->fMagic 
= USPOOF_MAGIC
; 
 560     fRawData
->fFormatVersion
[0] = 1; 
 561     fRawData
->fFormatVersion
[1] = 0; 
 562     fRawData
->fFormatVersion
[2] = 0; 
 563     fRawData
->fFormatVersion
[3] = 0; 
 567 // reset() - initialize all fields. 
 568 //           Should be updated if any new fields are added. 
 569 //           Called by constructors to put things in a known initial state. 
 570 void SpoofData::reset() { 
 578    fCFUStringLengths 
= NULL
; 
 581    fLowerCaseTrie 
= NULL
; 
 586 //  SpoofData::initPtrs() 
 587 //            Initialize the pointers to the various sections of the raw data. 
 589 //            This function is used both during the Trie building process (multiple 
 590 //            times, as the individual data sections are added), and 
 591 //            during the opening of a Spoof Checker from prebuilt data. 
 593 //            The pointers for non-existent data sections (identified by an offset of 0) 
 596 //            Note:  During building the data, adding each new data section 
 597 //            reallocs the raw data area, which likely relocates it, which 
 598 //            in turn requires reinitializing all of the pointers into it, hence 
 599 //            multiple calls to this function during building. 
 601 void SpoofData::initPtrs(UErrorCode 
&status
) { 
 604     fCFUStringLengths 
= NULL
; 
 606     if (U_FAILURE(status
)) { 
 609     if (fRawData
->fCFUKeys 
!= 0) { 
 610         fCFUKeys 
= (int32_t *)((char *)fRawData 
+ fRawData
->fCFUKeys
); 
 612     if (fRawData
->fCFUStringIndex 
!= 0) { 
 613         fCFUValues 
= (uint16_t *)((char *)fRawData 
+ fRawData
->fCFUStringIndex
); 
 615     if (fRawData
->fCFUStringLengths 
!= 0) { 
 616         fCFUStringLengths 
= (SpoofStringLengthsElement 
*)((char *)fRawData 
+ fRawData
->fCFUStringLengths
); 
 618     if (fRawData
->fCFUStringTable 
!= 0) { 
 619         fCFUStrings 
= (UChar 
*)((char *)fRawData 
+ fRawData
->fCFUStringTable
); 
 622     if (fAnyCaseTrie 
==  NULL 
&& fRawData
->fAnyCaseTrie 
!= 0) { 
 623         fAnyCaseTrie 
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
, 
 624             (char *)fRawData 
+ fRawData
->fAnyCaseTrie
, fRawData
->fAnyCaseTrieLength
, NULL
, &status
); 
 626     if (fLowerCaseTrie 
==  NULL 
&& fRawData
->fLowerCaseTrie 
!= 0) { 
 627         fLowerCaseTrie 
= utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS
, 
 628             (char *)fRawData 
+ fRawData
->fLowerCaseTrie
, fRawData
->fLowerCaseTrieLength
, NULL
, &status
); 
 631     if (fRawData
->fScriptSets 
!= 0) { 
 632         fScriptSets 
= (ScriptSet 
*)((char *)fRawData 
+ fRawData
->fScriptSets
); 
 637 SpoofData::~SpoofData() { 
 638     utrie2_close(fAnyCaseTrie
); 
 640     utrie2_close(fLowerCaseTrie
); 
 641     fLowerCaseTrie 
= NULL
; 
 653 void SpoofData::removeReference() { 
 654     if (umtx_atomic_dec(&fRefCount
) == 0) { 
 660 SpoofData 
*SpoofData::addReference() { 
 661     umtx_atomic_inc(&fRefCount
); 
 666 void *SpoofData::reserveSpace(int32_t numBytes
,  UErrorCode 
&status
) { 
 667     if (U_FAILURE(status
)) { 
 672         status 
= U_INTERNAL_PROGRAM_ERROR
; 
 676     numBytes 
= (numBytes 
+ 15) & ~15;   // Round up to a multiple of 16 
 677     uint32_t returnOffset 
= fMemLimit
; 
 678     fMemLimit 
+= numBytes
; 
 679     fRawData 
= static_cast<SpoofDataHeader 
*>(uprv_realloc(fRawData
, fMemLimit
)); 
 680     fRawData
->fLength 
= fMemLimit
; 
 681     uprv_memset((char *)fRawData 
+ returnOffset
, 0, numBytes
); 
 683     return (char *)fRawData 
+ returnOffset
; 
 691 //----------------------------------------------------------------------------- 
 693 //  uspoof_swap   -  byte swap and char encoding swap of spoof data 
 695 //----------------------------------------------------------------------------- 
 696 U_CAPI 
int32_t U_EXPORT2
 
 697 uspoof_swap(const UDataSwapper 
*ds
, const void *inData
, int32_t length
, void *outData
, 
 698            UErrorCode 
*status
) { 
 700     if (status 
== NULL 
|| U_FAILURE(*status
)) { 
 703     if(ds
==NULL 
|| inData
==NULL 
|| length
<-1 || (length
>0 && outData
==NULL
)) { 
 704         *status
=U_ILLEGAL_ARGUMENT_ERROR
; 
 709     //  Check that the data header is for spoof data. 
 710     //    (Header contents are defined in gencfu.cpp) 
 712     const UDataInfo 
*pInfo 
= (const UDataInfo 
*)((const char *)inData
+4); 
 713     if(!(  pInfo
->dataFormat
[0]==0x43 &&   /* dataFormat="Cfu " */ 
 714            pInfo
->dataFormat
[1]==0x66 && 
 715            pInfo
->dataFormat
[2]==0x75 && 
 716            pInfo
->dataFormat
[3]==0x20 && 
 717            pInfo
->formatVersion
[0]==1  )) { 
 718         udata_printError(ds
, "uspoof_swap(): data format %02x.%02x.%02x.%02x " 
 719                              "(format version %02x %02x %02x %02x) is not recognized\n", 
 720                          pInfo
->dataFormat
[0], pInfo
->dataFormat
[1], 
 721                          pInfo
->dataFormat
[2], pInfo
->dataFormat
[3], 
 722                          pInfo
->formatVersion
[0], pInfo
->formatVersion
[1], 
 723                          pInfo
->formatVersion
[2], pInfo
->formatVersion
[3]); 
 724         *status
=U_UNSUPPORTED_ERROR
; 
 729     // Swap the data header.  (This is the generic ICU Data Header, not the uspoof Specific 
 730     //                         header).  This swap also conveniently gets us 
 731     //                         the size of the ICU d.h., which lets us locate the start 
 732     //                         of the uspoof specific data. 
 734     int32_t headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, status
); 
 738     // Get the Spoof Data Header, and check that it appears to be OK. 
 741     const uint8_t   *inBytes 
=(const uint8_t *)inData
+headerSize
; 
 742     SpoofDataHeader 
*spoofDH 
= (SpoofDataHeader 
*)inBytes
; 
 743     if (ds
->readUInt32(spoofDH
->fMagic
)   != USPOOF_MAGIC 
|| 
 744         ds
->readUInt32(spoofDH
->fLength
)  <  sizeof(SpoofDataHeader
))  
 746         udata_printError(ds
, "uspoof_swap(): Spoof Data header is invalid.\n"); 
 747         *status
=U_UNSUPPORTED_ERROR
; 
 752     // Prefight operation?  Just return the size 
 754     int32_t spoofDataLength 
= ds
->readUInt32(spoofDH
->fLength
); 
 755     int32_t totalSize 
= headerSize 
+ spoofDataLength
; 
 761     // Check that length passed in is consistent with length from Spoof data header. 
 763     if (length 
< totalSize
) { 
 764         udata_printError(ds
, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n", 
 766         *status
=U_INDEX_OUTOFBOUNDS_ERROR
; 
 772     // Swap the Data.  Do the data itself first, then the Spoof Data Header, because 
 773     //                 we need to reference the header to locate the data, and an 
 774     //                 inplace swap of the header leaves it unusable. 
 776     uint8_t          *outBytes 
= (uint8_t *)outData 
+ headerSize
; 
 777     SpoofDataHeader  
*outputDH 
= (SpoofDataHeader 
*)outBytes
; 
 779     int32_t   sectionStart
; 
 780     int32_t   sectionLength
; 
 783     // If not swapping in place, zero out the output buffer before starting. 
 784     //    Gaps may exist between the individual sections, and these must be zeroed in 
 785     //    the output buffer.  The simplest way to do that is to just zero the whole thing. 
 787     if (inBytes 
!= outBytes
) { 
 788         uprv_memset(outBytes
, 0, spoofDataLength
); 
 791     // Confusables Keys Section   (fCFUKeys) 
 792     sectionStart  
= ds
->readUInt32(spoofDH
->fCFUKeys
); 
 793     sectionLength 
= ds
->readUInt32(spoofDH
->fCFUKeysSize
) * 4; 
 794     ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 796     // String Index Section 
 797     sectionStart  
= ds
->readUInt32(spoofDH
->fCFUStringIndex
); 
 798     sectionLength 
= ds
->readUInt32(spoofDH
->fCFUStringIndexSize
) * 2; 
 799     ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 801     // String Table Section 
 802     sectionStart  
= ds
->readUInt32(spoofDH
->fCFUStringTable
); 
 803     sectionLength 
= ds
->readUInt32(spoofDH
->fCFUStringTableLen
) * 2; 
 804     ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 806     // String Lengths Section 
 807     sectionStart  
= ds
->readUInt32(spoofDH
->fCFUStringLengths
); 
 808     sectionLength 
= ds
->readUInt32(spoofDH
->fCFUStringLengthsSize
) * 4; 
 809     ds
->swapArray16(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 812     sectionStart  
= ds
->readUInt32(spoofDH
->fAnyCaseTrie
); 
 813     sectionLength 
= ds
->readUInt32(spoofDH
->fAnyCaseTrieLength
); 
 814     utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 817     sectionStart  
= ds
->readUInt32(spoofDH
->fLowerCaseTrie
); 
 818     sectionLength 
= ds
->readUInt32(spoofDH
->fLowerCaseTrieLength
); 
 819     utrie2_swap(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 821     // Script Sets.  The data is an array of int32_t 
 822     sectionStart  
= ds
->readUInt32(spoofDH
->fScriptSets
); 
 823     sectionLength 
= ds
->readUInt32(spoofDH
->fScriptSetsLength
) * sizeof(ScriptSet
); 
 824     ds
->swapArray32(ds
, inBytes
+sectionStart
, sectionLength
, outBytes
+sectionStart
, status
); 
 826     // And, last, swap the header itself. 
 827     //   int32_t   fMagic             // swap this 
 828     //   uint8_t   fFormatVersion[4]  // Do not swap this, just copy 
 829     //   int32_t   fLength and all the rest       // Swap the rest, all is 32 bit stuff. 
 831     uint32_t magic 
= ds
->readUInt32(spoofDH
->fMagic
); 
 832     ds
->writeUInt32((uint32_t *)&outputDH
->fMagic
, magic
); 
 834     if (outputDH
->fFormatVersion 
!= spoofDH
->fFormatVersion
) { 
 835         uprv_memcpy(outputDH
->fFormatVersion
, spoofDH
->fFormatVersion
, sizeof(spoofDH
->fFormatVersion
)); 
 837     // swap starting at fLength 
 838     ds
->swapArray32(ds
, &spoofDH
->fLength
, sizeof(SpoofDataHeader
)-8 /* minus magic and fFormatVersion[4] */, &outputDH
->fLength
, status
);