icuSources/i18n/uspoof_impl.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2008-2013, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 */
   7
   8 #include "unicode/utypes.h"
   9 #include "unicode/uspoof.h"
  10 #include "unicode/uchar.h"
  11 #include "unicode/uniset.h"
  12 #include "unicode/utf16.h"
  13 #include "utrie2.h"
  14 #include "cmemory.h"
  15 #include "cstring.h"
  16 #include "identifier_info.h"
  17 #include "scriptset.h"
  18 #include "udatamem.h"
  19 #include "umutex.h"
  20 #include "udataswp.h"
  21 #include "uassert.h"
  22 #include "uspoof_impl.h"
  23
  24 #if !UCONFIG_NO_NORMALIZATION
  25
  26
  27 U_NAMESPACE_BEGIN
  28
  29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SpoofImpl)
  30
  31 SpoofImpl::SpoofImpl(SpoofData *data, UErrorCode &status) :
  32         fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
  33         fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
  34     if (U_FAILURE(status)) {
  35         return;
  36     }
  37     fSpoofData = data;
  38     fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE;
  39
  40     UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
  41     allowedCharsSet->freeze();
  42     fAllowedCharsSet = allowedCharsSet;
  43     fAllowedLocales  = uprv_strdup("");
  44     if (fAllowedCharsSet == NULL || fAllowedLocales == NULL) {
  45         status = U_MEMORY_ALLOCATION_ERROR;
  46         return;
  47     }
  48     fMagic = USPOOF_MAGIC;
  49 }
  50
  51
  52 SpoofImpl::SpoofImpl() :
  53         fMagic(USPOOF_MAGIC), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
  54         fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
  55     UnicodeSet *allowedCharsSet = new UnicodeSet(0, 0x10ffff);
  56     allowedCharsSet->freeze();
  57     fAllowedCharsSet = allowedCharsSet;
  58     fAllowedLocales  = uprv_strdup("");
  59     fRestrictionLevel = USPOOF_HIGHLY_RESTRICTIVE;
  60 }
  61
  62
  63 // Copy Constructor, used by the user level clone() function.
  64 SpoofImpl::SpoofImpl(const SpoofImpl &src, UErrorCode &status)  :
  65         fMagic(0), fChecks(USPOOF_ALL_CHECKS), fSpoofData(NULL), fAllowedCharsSet(NULL) ,
  66         fAllowedLocales(NULL), fCachedIdentifierInfo(NULL) {
  67     if (U_FAILURE(status)) {
  68         return;
  69     }
  70     fMagic = src.fMagic;
  71     fChecks = src.fChecks;
  72     if (src.fSpoofData != NULL) {
  73         fSpoofData = src.fSpoofData->addReference();
  74     }
  75     fAllowedCharsSet = static_cast<const UnicodeSet *>(src.fAllowedCharsSet->clone());
  76     if (fAllowedCharsSet == NULL) {
  77         status = U_MEMORY_ALLOCATION_ERROR;
  78     }
  79     fAllowedLocales = uprv_strdup(src.fAllowedLocales);
  80     fRestrictionLevel = src.fRestrictionLevel;
  81 }
  82
  83 SpoofImpl::~SpoofImpl() {
  84     fMagic = 0;                // head off application errors by preventing use of
  85                                //    of deleted objects.
  86     if (fSpoofData != NULL) {
  87         fSpoofData->removeReference();   // Will delete if refCount goes to zero.
  88     }
  89     delete fAllowedCharsSet;
  90     uprv_free((void *)fAllowedLocales);
  91     delete fCachedIdentifierInfo;
  92 }
  93
  94 //
  95 //  Incoming parameter check on Status and the SpoofChecker object
  96 //    received from the C API.
  97 //
  98 const SpoofImpl *SpoofImpl::validateThis(const USpoofChecker *sc, UErrorCode &status) {
  99     if (U_FAILURE(status)) {
 100         return NULL;
 101     }
 102     if (sc == NULL) {
 103         status = U_ILLEGAL_ARGUMENT_ERROR;
 104         return NULL;
 105     }
 106     SpoofImpl *This = (SpoofImpl *)sc;
 107     if (This->fMagic != USPOOF_MAGIC ||
 108         This->fSpoofData == NULL) {
 109         status = U_INVALID_FORMAT_ERROR;
 110         return NULL;
 111     }
 112     if (!SpoofData::validateDataVersion(This->fSpoofData->fRawData, status)) {
 113         return NULL;
 114     }
 115     return This;
 116 }
 117
 118 SpoofImpl *SpoofImpl::validateThis(USpoofChecker *sc, UErrorCode &status) {
 119     return const_cast<SpoofImpl *>
 120         (SpoofImpl::validateThis(const_cast<const USpoofChecker *>(sc), status));
 121 }
 122
 123
 124
 125 //--------------------------------------------------------------------------------------
 126 //
 127 //  confusableLookup()    This is the heart of the confusable skeleton generation
 128 //                        implementation.
 129 //
 130 //                        Given a source character, produce the corresponding
 131 //                        replacement character(s), appending them to the dest string.
 132 //
 133 //---------------------------------------------------------------------------------------
 134 int32_t SpoofImpl::confusableLookup(UChar32 inChar, int32_t tableMask, UnicodeString &dest) const {
 135
 136     // Binary search the spoof data key table for the inChar
 137     int32_t  *low   = fSpoofData->fCFUKeys;
 138     int32_t  *mid   = NULL;
 139     int32_t  *limit = low + fSpoofData->fRawData->fCFUKeysSize;
 140     UChar32   midc;
 141     do {
 142         int32_t delta = ((int32_t)(limit-low))/2;
 143         mid = low + delta;
 144         midc = *mid & 0x1fffff;
 145         if (inChar == midc) {
 146             goto foundChar;
 147         } else if (inChar < midc) {
 148             limit = mid;
 149         } else {
 150             low = mid;
 151         }
 152     } while (low < limit-1);
 153     mid = low;
 154     midc = *mid & 0x1fffff;
 155     if (inChar != midc) {
 156         // Char not found.  It maps to itself.
 157         int i = 0;
 158         dest.append(inChar);
 159         return i;
 160     }
 161   foundChar:
 162     int32_t keyFlags = *mid & 0xff000000;
 163     if ((keyFlags & tableMask) == 0) {
 164         // We found the right key char, but the entry doesn't pertain to the
 165         //  table we need.  See if there is an adjacent key that does
 166         if (keyFlags & USPOOF_KEY_MULTIPLE_VALUES) {
 167             int32_t *altMid;
 168             for (altMid = mid-1; (*altMid&0x00ffffff) == inChar; altMid--) {
 169                 keyFlags = *altMid & 0xff000000;
 170                 if (keyFlags & tableMask) {
 171                     mid = altMid;
 172                     goto foundKey;
 173                 }
 174             }
 175             for (altMid = mid+1; (*altMid&0x00ffffff) == inChar; altMid++) {
 176                 keyFlags = *altMid & 0xff000000;
 177                 if (keyFlags & tableMask) {
 178                     mid = altMid;
 179                     goto foundKey;
 180                 }
 181             }
 182         }
 183         // No key entry for this char & table.
 184         // The input char maps to itself.
 185         int i = 0;
 186         dest.append(inChar);
 187         return i;
 188     }
 189
 190   foundKey:
 191     int32_t  stringLen = USPOOF_KEY_LENGTH_FIELD(keyFlags) + 1;
 192     int32_t keyTableIndex = (int32_t)(mid - fSpoofData->fCFUKeys);
 193
 194     // Value is either a UChar  (for strings of length 1) or
 195     //                 an index into the string table (for longer strings)
 196     uint16_t value = fSpoofData->fCFUValues[keyTableIndex];
 197     if (stringLen == 1) {
 198         dest.append((UChar)value);
 199         return 1;
 200     }
 201
 202     // String length of 4 from the above lookup is used for all strings of length >= 4.
 203     // For these, get the real length from the string lengths table,
 204     //   which maps string table indexes to lengths.
 205     //   All strings of the same length are stored contiguously in the string table.
 206     //   'value' from the lookup above is the starting index for the desired string.
 207
 208     int32_t ix;
 209     if (stringLen == 4) {
 210         int32_t stringLengthsLimit = fSpoofData->fRawData->fCFUStringLengthsSize;
 211         for (ix = 0; ix < stringLengthsLimit; ix++) {
 212             if (fSpoofData->fCFUStringLengths[ix].fLastString >= value) {
 213                 stringLen = fSpoofData->fCFUStringLengths[ix].fStrLength;
 214                 break;
 215             }
 216         }
 217         U_ASSERT(ix < stringLengthsLimit);
 218     }
 219
 220     U_ASSERT(value + stringLen <= fSpoofData->fRawData->fCFUStringTableLen);
 221     UChar *src = &fSpoofData->fCFUStrings[value];
 222     dest.append(src, stringLen);
 223     return stringLen;
 224 }
 225
 226
 227 //---------------------------------------------------------------------------------------
 228 //
 229 //  wholeScriptCheck()
 230 //
 231 //      Input text is already normalized to NFD
 232 //      Return the set of scripts, each of which can represent something that is
 233 //             confusable with the input text.  The script of the input text
 234 //             is included; input consisting of characters from a single script will
 235 //             always produce a result consisting of a set containing that script.
 236 //
 237 //---------------------------------------------------------------------------------------
 238 void SpoofImpl::wholeScriptCheck(
 239         const UnicodeString &text, ScriptSet *result, UErrorCode &status) const {
 240
 241     UTrie2 *table =
 242         (fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie;
 243     result->setAll();
 244     int32_t length = text.length();
 245     for (int32_t inputIdx=0; inputIdx < length;) {
 246         UChar32 c = text.char32At(inputIdx);
 247         inputIdx += U16_LENGTH(c);
 248         uint32_t index = utrie2_get32(table, c);
 249         if (index == 0) {
 250             // No confusables in another script for this char.
 251             // TODO:  we should change the data to have sets with just the single script
 252             //        bit for the script of this char.  Gets rid of this special case.
 253             //        Until then, grab the script from the char and intersect it with the set.
 254             UScriptCode cpScript = uscript_getScript(c, &status);
 255             U_ASSERT(cpScript > USCRIPT_INHERITED);
 256             result->intersect(cpScript, status);
 257         } else if (index == 1) {
 258             // Script == Common or Inherited.  Nothing to do.
 259         } else {
 260             result->intersect(fSpoofData->fScriptSets[index]);
 261         }
 262     }
 263 }
 264
 265
 266 void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) {
 267     UnicodeSet    allowedChars;
 268     UnicodeSet    *tmpSet = NULL;
 269     const char    *locStart = localesList;
 270     const char    *locEnd = NULL;
 271     const char    *localesListEnd = localesList + uprv_strlen(localesList);
 272     int32_t        localeListCount = 0;   // Number of locales provided by caller.
 273
 274     // Loop runs once per locale from the localesList, a comma separated list of locales.
 275     do {
 276         locEnd = uprv_strchr(locStart, ',');
 277         if (locEnd == NULL) {
 278             locEnd = localesListEnd;
 279         }
 280         while (*locStart == ' ') {
 281             locStart++;
 282         }
 283         const char *trimmedEnd = locEnd-1;
 284         while (trimmedEnd > locStart && *trimmedEnd == ' ') {
 285             trimmedEnd--;
 286         }
 287         if (trimmedEnd <= locStart) {
 288             break;
 289         }
 290         const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart));
 291         localeListCount++;
 292
 293         // We have one locale from the locales list.
 294         // Add the script chars for this locale to the accumulating set of allowed chars.
 295         // If the locale is no good, we will be notified back via status.
 296         addScriptChars(locale, &allowedChars, status);
 297         uprv_free((void *)locale);
 298         if (U_FAILURE(status)) {
 299             break;
 300         }
 301         locStart = locEnd + 1;
 302     } while (locStart < localesListEnd);
 303
 304     // If our caller provided an empty list of locales, we disable the allowed characters checking
 305     if (localeListCount == 0) {
 306         uprv_free((void *)fAllowedLocales);
 307         fAllowedLocales = uprv_strdup("");
 308         tmpSet = new UnicodeSet(0, 0x10ffff);
 309         if (fAllowedLocales == NULL || tmpSet == NULL) {
 310             status = U_MEMORY_ALLOCATION_ERROR;
 311             return;
 312         }
 313         tmpSet->freeze();
 314         delete fAllowedCharsSet;
 315         fAllowedCharsSet = tmpSet;
 316         fChecks &= ~USPOOF_CHAR_LIMIT;
 317         return;
 318     }
 319
 320
 321     // Add all common and inherited characters to the set of allowed chars.
 322     UnicodeSet tempSet;
 323     tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
 324     allowedChars.addAll(tempSet);
 325     tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
 326     allowedChars.addAll(tempSet);
 327
 328     // If anything went wrong, we bail out without changing
 329     // the state of the spoof checker.
 330     if (U_FAILURE(status)) {
 331         return;
 332     }
 333
 334     // Store the updated spoof checker state.
 335     tmpSet = static_cast<UnicodeSet *>(allowedChars.clone());
 336     const char *tmpLocalesList = uprv_strdup(localesList);
 337     if (tmpSet == NULL || tmpLocalesList == NULL) {
 338         status = U_MEMORY_ALLOCATION_ERROR;
 339         return;
 340     }
 341     uprv_free((void *)fAllowedLocales);
 342     fAllowedLocales = tmpLocalesList;
 343     tmpSet->freeze();
 344     delete fAllowedCharsSet;
 345     fAllowedCharsSet = tmpSet;
 346     fChecks |= USPOOF_CHAR_LIMIT;
 347 }
 348
 349
 350 const char * SpoofImpl::getAllowedLocales(UErrorCode &/*status*/) {
 351     return fAllowedLocales;
 352 }
 353
 354
 355 // Given a locale (a language), add all the characters from all of the scripts used with that language
 356 // to the allowedChars UnicodeSet
 357
 358 void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) {
 359     UScriptCode scripts[30];
 360
 361     int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
 362     if (U_FAILURE(status)) {
 363         return;
 364     }
 365     if (status == U_USING_DEFAULT_WARNING) {
 366         status = U_ILLEGAL_ARGUMENT_ERROR;
 367         return;
 368     }
 369     UnicodeSet tmpSet;
 370     int32_t    i;
 371     for (i=0; i<numScripts; i++) {
 372         tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
 373         allowedChars->addAll(tmpSet);
 374     }
 375 }
 376
 377
 378 // Convert a text format hex number.  Utility function used by builder code.  Static.
 379 // Input: UChar *string text.  Output: a UChar32
 380 // Input has been pre-checked, and will have no non-hex chars.
 381 // The number must fall in the code point range of 0..0x10ffff
 382 // Static Function.
 383 UChar32 SpoofImpl::ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status) {
 384     if (U_FAILURE(status)) {
 385         return 0;
 386     }
 387     U_ASSERT(limit-start > 0);
 388     uint32_t val = 0;
 389     int i;
 390     for (i=start; i<limit; i++) {
 391         int digitVal = s[i] - 0x30;
 392         if (digitVal>9) {
 393             digitVal = 0xa + (s[i] - 0x41);  // Upper Case 'A'
 394         }
 395         if (digitVal>15) {
 396             digitVal = 0xa + (s[i] - 0x61);  // Lower Case 'a'
 397         }
 398         U_ASSERT(digitVal <= 0xf);
 399         val <<= 4;
 400         val += digitVal;
 401     }
 402     if (val > 0x10ffff) {
 403         status = U_PARSE_ERROR;
 404         val = 0;
 405     }
 406     return (UChar32)val;
 407 }
 408
 409 // IdentifierInfo Cache. IdentifierInfo objects are somewhat expensive to create.
 410 //                       Maintain a one-element cache, which is sufficient to avoid repeatedly
 411 //                       creating new ones unless we get multi-thread concurrency in spoof
 412 //                       check operations, which should be statistically uncommon.
 413
 414 // These functions are used in place of new & delete of an IdentifierInfo.
 415 // They will recycle the IdentifierInfo when possible.
 416 // They are logically const, and used within const functions that must be thread safe.
 417 IdentifierInfo *SpoofImpl::getIdentifierInfo(UErrorCode &status) const {
 418     IdentifierInfo *returnIdInfo = NULL;
 419     if (U_FAILURE(status)) {
 420         return returnIdInfo;
 421     }
 422     SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this);
 423     {
 424         Mutex m;
 425         returnIdInfo = nonConstThis->fCachedIdentifierInfo;
 426         nonConstThis->fCachedIdentifierInfo = NULL;
 427     }
 428     if (returnIdInfo == NULL) {
 429         returnIdInfo = new IdentifierInfo(status);
 430         if (U_SUCCESS(status) && returnIdInfo == NULL) {
 431             status = U_MEMORY_ALLOCATION_ERROR;
 432         }
 433         if (U_FAILURE(status) && returnIdInfo != NULL) {
 434             delete returnIdInfo;
 435             returnIdInfo = NULL;
 436         }
 437     }
 438     return returnIdInfo;
 439 }
 440
 441
 442 void SpoofImpl::releaseIdentifierInfo(IdentifierInfo *idInfo) const {
 443     if (idInfo != NULL) {
 444         SpoofImpl *nonConstThis = const_cast<SpoofImpl *>(this);
 445         {
 446             Mutex m;
 447             if (nonConstThis->fCachedIdentifierInfo == NULL) {
 448                 nonConstThis->fCachedIdentifierInfo = idInfo;
 449                 idInfo = NULL;
 450             }
 451         }
 452         delete idInfo;
 453     }
 454 }
 455
 456
 457
 458
 459 //----------------------------------------------------------------------------------------------
 460 //
 461 //   class SpoofData Implementation
 462 //
 463 //----------------------------------------------------------------------------------------------
 464
 465
 466 UBool SpoofData::validateDataVersion(const SpoofDataHeader *rawData, UErrorCode &status) {
 467     if (U_FAILURE(status) ||
 468         rawData == NULL ||
 469         rawData->fMagic != USPOOF_MAGIC ||
 470         rawData->fFormatVersion[0] > 1 ||
 471         rawData->fFormatVersion[1] > 0) {
 472             status = U_INVALID_FORMAT_ERROR;
 473             return FALSE;
 474     }
 475     return TRUE;
 476 }
 477
 478 //
 479 //  SpoofData::getDefault() - return a wrapper around the spoof data that is
 480 //                           baked into the default ICU data.
 481 //
 482 SpoofData *SpoofData::getDefault(UErrorCode &status) {
 483     // TODO:  Cache it.  Lazy create, keep until cleanup.
 484
 485     UDataMemory *udm = udata_open(NULL, "cfu", "confusables", &status);
 486     if (U_FAILURE(status)) {
 487         return NULL;
 488     }
 489     SpoofData *This = new SpoofData(udm, status);
 490     if (U_FAILURE(status)) {
 491         delete This;
 492         return NULL;
 493     }
 494     if (This == NULL) {
 495         status = U_MEMORY_ALLOCATION_ERROR;
 496     }
 497     return This;
 498 }
 499
 500
 501 SpoofData::SpoofData(UDataMemory *udm, UErrorCode &status)
 502 {
 503     reset();
 504     if (U_FAILURE(status)) {
 505         return;
 506     }
 507     fRawData = reinterpret_cast<SpoofDataHeader *>
 508                    ((char *)(udm->pHeader) + udm->pHeader->dataHeader.headerSize);
 509     fUDM = udm;
 510     validateDataVersion(fRawData, status);
 511     initPtrs(status);
 512 }
 513
 514
 515 SpoofData::SpoofData(const void *data, int32_t length, UErrorCode &status)
 516 {
 517     reset();
 518     if (U_FAILURE(status)) {
 519         return;
 520     }
 521     if ((size_t)length < sizeof(SpoofDataHeader)) {
 522         status = U_INVALID_FORMAT_ERROR;
 523         return;
 524     }
 525     void *ncData = const_cast<void *>(data);
 526     fRawData = static_cast<SpoofDataHeader *>(ncData);
 527     if (length < fRawData->fLength) {
 528         status = U_INVALID_FORMAT_ERROR;
 529         return;
 530     }
 531     validateDataVersion(fRawData, status);
 532     initPtrs(status);
 533 }
 534
 535
 536 // Spoof Data constructor for use from data builder.
 537 //   Initializes a new, empty data area that will be populated later.
 538 SpoofData::SpoofData(UErrorCode &status) {
 539     reset();
 540     if (U_FAILURE(status)) {
 541         return;
 542     }
 543     fDataOwned = true;
 544     fRefCount = 1;
 545
 546     // The spoof header should already be sized to be a multiple of 16 bytes.
 547     // Just in case it's not, round it up.
 548     uint32_t initialSize = (sizeof(SpoofDataHeader) + 15) & ~15;
 549     U_ASSERT(initialSize == sizeof(SpoofDataHeader));
 550
 551     fRawData = static_cast<SpoofDataHeader *>(uprv_malloc(initialSize));
 552     fMemLimit = initialSize;
 553     if (fRawData == NULL) {
 554         status = U_MEMORY_ALLOCATION_ERROR;
 555         return;
 556     }
 557     uprv_memset(fRawData, 0, initialSize);
 558
 559     fRawData->fMagic = USPOOF_MAGIC;
 560     fRawData->fFormatVersion[0] = 1;
 561     fRawData->fFormatVersion[1] = 0;
 562     fRawData->fFormatVersion[2] = 0;
 563     fRawData->fFormatVersion[3] = 0;
 564     initPtrs(status);
 565 }
 566
 567 // reset() - initialize all fields.
 568 //           Should be updated if any new fields are added.
 569 //           Called by constructors to put things in a known initial state.
 570 void SpoofData::reset() {
 571    fRawData = NULL;
 572    fDataOwned = FALSE;
 573    fUDM      = NULL;
 574    fMemLimit = 0;
 575    fRefCount = 1;
 576    fCFUKeys = NULL;
 577    fCFUValues = NULL;
 578    fCFUStringLengths = NULL;
 579    fCFUStrings = NULL;
 580    fAnyCaseTrie = NULL;
 581    fLowerCaseTrie = NULL;
 582    fScriptSets = NULL;
 583 }
 584
 585
 586 //  SpoofData::initPtrs()
 587 //            Initialize the pointers to the various sections of the raw data.
 588 //
 589 //            This function is used both during the Trie building process (multiple
 590 //            times, as the individual data sections are added), and
 591 //            during the opening of a Spoof Checker from prebuilt data.
 592 //
 593 //            The pointers for non-existent data sections (identified by an offset of 0)
 594 //            are set to NULL.
 595 //
 596 //            Note:  During building the data, adding each new data section
 597 //            reallocs the raw data area, which likely relocates it, which
 598 //            in turn requires reinitializing all of the pointers into it, hence
 599 //            multiple calls to this function during building.
 600 //
 601 void SpoofData::initPtrs(UErrorCode &status) {
 602     fCFUKeys = NULL;
 603     fCFUValues = NULL;
 604     fCFUStringLengths = NULL;
 605     fCFUStrings = NULL;
 606     if (U_FAILURE(status)) {
 607         return;
 608     }
 609     if (fRawData->fCFUKeys != 0) {
 610         fCFUKeys = (int32_t *)((char *)fRawData + fRawData->fCFUKeys);
 611     }
 612     if (fRawData->fCFUStringIndex != 0) {
 613         fCFUValues = (uint16_t *)((char *)fRawData + fRawData->fCFUStringIndex);
 614     }
 615     if (fRawData->fCFUStringLengths != 0) {
 616         fCFUStringLengths = (SpoofStringLengthsElement *)((char *)fRawData + fRawData->fCFUStringLengths);
 617     }
 618     if (fRawData->fCFUStringTable != 0) {
 619         fCFUStrings = (UChar *)((char *)fRawData + fRawData->fCFUStringTable);
 620     }
 621
 622     if (fAnyCaseTrie ==  NULL && fRawData->fAnyCaseTrie != 0) {
 623         fAnyCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
 624             (char *)fRawData + fRawData->fAnyCaseTrie, fRawData->fAnyCaseTrieLength, NULL, &status);
 625     }
 626     if (fLowerCaseTrie ==  NULL && fRawData->fLowerCaseTrie != 0) {
 627         fLowerCaseTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
 628             (char *)fRawData + fRawData->fLowerCaseTrie, fRawData->fLowerCaseTrieLength, NULL, &status);
 629     }
 630
 631     if (fRawData->fScriptSets != 0) {
 632         fScriptSets = (ScriptSet *)((char *)fRawData + fRawData->fScriptSets);
 633     }
 634 }
 635
 636
 637 SpoofData::~SpoofData() {
 638     utrie2_close(fAnyCaseTrie);
 639     fAnyCaseTrie = NULL;
 640     utrie2_close(fLowerCaseTrie);
 641     fLowerCaseTrie = NULL;
 642     if (fDataOwned) {
 643         uprv_free(fRawData);
 644     }
 645     fRawData = NULL;
 646     if (fUDM != NULL) {
 647         udata_close(fUDM);
 648     }
 649     fUDM = NULL;
 650 }
 651
 652
 653 void SpoofData::removeReference() {
 654     if (umtx_atomic_dec(&fRefCount) == 0) {
 655         delete this;
 656     }
 657 }
 658
 659
 660 SpoofData *SpoofData::addReference() {
 661     umtx_atomic_inc(&fRefCount);
 662     return this;
 663 }
 664
 665
 666 void *SpoofData::reserveSpace(int32_t numBytes,  UErrorCode &status) {
 667     if (U_FAILURE(status)) {
 668         return NULL;
 669     }
 670     if (!fDataOwned) {
 671         U_ASSERT(FALSE);
 672         status = U_INTERNAL_PROGRAM_ERROR;
 673         return NULL;
 674     }
 675
 676     numBytes = (numBytes + 15) & ~15;   // Round up to a multiple of 16
 677     uint32_t returnOffset = fMemLimit;
 678     fMemLimit += numBytes;
 679     fRawData = static_cast<SpoofDataHeader *>(uprv_realloc(fRawData, fMemLimit));
 680     fRawData->fLength = fMemLimit;
 681     uprv_memset((char *)fRawData + returnOffset, 0, numBytes);
 682     initPtrs(status);
 683     return (char *)fRawData + returnOffset;
 684 }
 685
 686
 687 U_NAMESPACE_END
 688
 689 U_NAMESPACE_USE
 690
 691 //-----------------------------------------------------------------------------
 692 //
 693 //  uspoof_swap   -  byte swap and char encoding swap of spoof data
 694 //
 695 //-----------------------------------------------------------------------------
 696 U_CAPI int32_t U_EXPORT2
 697 uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
 698            UErrorCode *status) {
 699
 700     if (status == NULL || U_FAILURE(*status)) {
 701         return 0;
 702     }
 703     if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
 704         *status=U_ILLEGAL_ARGUMENT_ERROR;
 705         return 0;
 706     }
 707
 708     //
 709     //  Check that the data header is for spoof data.
 710     //    (Header contents are defined in gencfu.cpp)
 711     //
 712     const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4);
 713     if(!(  pInfo->dataFormat[0]==0x43 &&   /* dataFormat="Cfu " */
 714            pInfo->dataFormat[1]==0x66 &&
 715            pInfo->dataFormat[2]==0x75 &&
 716            pInfo->dataFormat[3]==0x20 &&
 717            pInfo->formatVersion[0]==1  )) {
 718         udata_printError(ds, "uspoof_swap(): data format %02x.%02x.%02x.%02x "
 719                              "(format version %02x %02x %02x %02x) is not recognized\n",
 720                          pInfo->dataFormat[0], pInfo->dataFormat[1],
 721                          pInfo->dataFormat[2], pInfo->dataFormat[3],
 722                          pInfo->formatVersion[0], pInfo->formatVersion[1],
 723                          pInfo->formatVersion[2], pInfo->formatVersion[3]);
 724         *status=U_UNSUPPORTED_ERROR;
 725         return 0;
 726     }
 727
 728     //
 729     // Swap the data header.  (This is the generic ICU Data Header, not the uspoof Specific
 730     //                         header).  This swap also conveniently gets us
 731     //                         the size of the ICU d.h., which lets us locate the start
 732     //                         of the uspoof specific data.
 733     //
 734     int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
 735
 736
 737     //
 738     // Get the Spoof Data Header, and check that it appears to be OK.
 739     //
 740     //
 741     const uint8_t   *inBytes =(const uint8_t *)inData+headerSize;
 742     SpoofDataHeader *spoofDH = (SpoofDataHeader *)inBytes;
 743     if (ds->readUInt32(spoofDH->fMagic)   != USPOOF_MAGIC ||
 744         ds->readUInt32(spoofDH->fLength)  <  sizeof(SpoofDataHeader))
 745     {
 746         udata_printError(ds, "uspoof_swap(): Spoof Data header is invalid.\n");
 747         *status=U_UNSUPPORTED_ERROR;
 748         return 0;
 749     }
 750
 751     //
 752     // Prefight operation?  Just return the size
 753     //
 754     int32_t spoofDataLength = ds->readUInt32(spoofDH->fLength);
 755     int32_t totalSize = headerSize + spoofDataLength;
 756     if (length < 0) {
 757         return totalSize;
 758     }
 759
 760     //
 761     // Check that length passed in is consistent with length from Spoof data header.
 762     //
 763     if (length < totalSize) {
 764         udata_printError(ds, "uspoof_swap(): too few bytes (%d after ICU Data header) for spoof data.\n",
 765                             spoofDataLength);
 766         *status=U_INDEX_OUTOFBOUNDS_ERROR;
 767         return 0;
 768         }
 769
 770
 771     //
 772     // Swap the Data.  Do the data itself first, then the Spoof Data Header, because
 773     //                 we need to reference the header to locate the data, and an
 774     //                 inplace swap of the header leaves it unusable.
 775     //
 776     uint8_t          *outBytes = (uint8_t *)outData + headerSize;
 777     SpoofDataHeader  *outputDH = (SpoofDataHeader *)outBytes;
 778
 779     int32_t   sectionStart;
 780     int32_t   sectionLength;
 781
 782     //
 783     // If not swapping in place, zero out the output buffer before starting.
 784     //    Gaps may exist between the individual sections, and these must be zeroed in
 785     //    the output buffer.  The simplest way to do that is to just zero the whole thing.
 786     //
 787     if (inBytes != outBytes) {
 788         uprv_memset(outBytes, 0, spoofDataLength);
 789     }
 790
 791     // Confusables Keys Section   (fCFUKeys)
 792     sectionStart  = ds->readUInt32(spoofDH->fCFUKeys);
 793     sectionLength = ds->readUInt32(spoofDH->fCFUKeysSize) * 4;
 794     ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 795
 796     // String Index Section
 797     sectionStart  = ds->readUInt32(spoofDH->fCFUStringIndex);
 798     sectionLength = ds->readUInt32(spoofDH->fCFUStringIndexSize) * 2;
 799     ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 800
 801     // String Table Section
 802     sectionStart  = ds->readUInt32(spoofDH->fCFUStringTable);
 803     sectionLength = ds->readUInt32(spoofDH->fCFUStringTableLen) * 2;
 804     ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 805
 806     // String Lengths Section
 807     sectionStart  = ds->readUInt32(spoofDH->fCFUStringLengths);
 808     sectionLength = ds->readUInt32(spoofDH->fCFUStringLengthsSize) * 4;
 809     ds->swapArray16(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 810
 811     // Any Case Trie
 812     sectionStart  = ds->readUInt32(spoofDH->fAnyCaseTrie);
 813     sectionLength = ds->readUInt32(spoofDH->fAnyCaseTrieLength);
 814     utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 815
 816     // Lower Case Trie
 817     sectionStart  = ds->readUInt32(spoofDH->fLowerCaseTrie);
 818     sectionLength = ds->readUInt32(spoofDH->fLowerCaseTrieLength);
 819     utrie2_swap(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 820
 821     // Script Sets.  The data is an array of int32_t
 822     sectionStart  = ds->readUInt32(spoofDH->fScriptSets);
 823     sectionLength = ds->readUInt32(spoofDH->fScriptSetsLength) * sizeof(ScriptSet);
 824     ds->swapArray32(ds, inBytes+sectionStart, sectionLength, outBytes+sectionStart, status);
 825
 826     // And, last, swap the header itself.
 827     //   int32_t   fMagic             // swap this
 828     //   uint8_t   fFormatVersion[4]  // Do not swap this, just copy
 829     //   int32_t   fLength and all the rest       // Swap the rest, all is 32 bit stuff.
 830     //
 831     uint32_t magic = ds->readUInt32(spoofDH->fMagic);
 832     ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic);
 833
 834     if (outputDH->fFormatVersion != spoofDH->fFormatVersion) {
 835         uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion));
 836     }
 837     // swap starting at fLength
 838     ds->swapArray32(ds, &spoofDH->fLength, sizeof(SpoofDataHeader)-8 /* minus magic and fFormatVersion[4] */, &outputDH->fLength, status);
 839
 840     return totalSize;
 841 }
 842
 843 #endif
 844
 845