icuSources/common/usprep.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4  *******************************************************************************
   5  *
   6  *   Copyright (C) 2003-2016, International Business Machines
   7  *   Corporation and others.  All Rights Reserved.
   8  *
   9  *******************************************************************************
  10  *   file name:  usprep.cpp
  11  *   encoding:   UTF-8
  12  *   tab size:   8 (not used)
  13  *   indentation:4
  14  *
  15  *   created on: 2003jul2
  16  *   created by: Ram Viswanadha
  17  */
  18
  19 #include "unicode/utypes.h"
  20
  21 #if !UCONFIG_NO_IDNA
  22
  23 #include "unicode/usprep.h"
  24
  25 #include "unicode/normalizer2.h"
  26 #include "unicode/ustring.h"
  27 #include "unicode/uchar.h"
  28 #include "unicode/uversion.h"
  29 #include "umutex.h"
  30 #include "cmemory.h"
  31 #include "sprpimpl.h"
  32 #include "ustr_imp.h"
  33 #include "uhash.h"
  34 #include "cstring.h"
  35 #include "udataswp.h"
  36 #include "ucln_cmn.h"
  37 #include "ubidi_props.h"
  38 #include "uprops.h"
  39
  40 U_NAMESPACE_USE
  41
  42 U_CDECL_BEGIN
  43
  44 /*
  45 Static cache for already opened StringPrep profiles
  46 */
  47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
  48 static icu::UInitOnce gSharedDataInitOnce;
  49
  50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
  51
  52 /* format version of spp file */
  53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
  54
  55 /* the Unicode version of the sprep data */
  56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
  57
  58 /* Profile names must be aligned to UStringPrepProfileType */
  59 static const char * const PROFILE_NAMES[] = {
  60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
  61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
  62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
  63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
  64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
  65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
  66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
  67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
  68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
  69     "rfc4011",      /* USPREP_RFC4011_MIB */
  70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
  71     "rfc4505",      /* USPREP_RFC4505_TRACE */
  72     "rfc4518",      /* USPREP_RFC4518_LDAP */
  73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
  74 };
  75
  76 static UBool U_CALLCONV
  77 isSPrepAcceptable(void * /* context */,
  78              const char * /* type */,
  79              const char * /* name */,
  80              const UDataInfo *pInfo) {
  81     if(
  82         pInfo->size>=20 &&
  83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
  85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
  86         pInfo->dataFormat[1]==0x50 &&
  87         pInfo->dataFormat[2]==0x52 &&
  88         pInfo->dataFormat[3]==0x50 &&
  89         pInfo->formatVersion[0]==3 &&
  90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
  91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
  92     ) {
  93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
  94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
  95         return TRUE;
  96     } else {
  97         return FALSE;
  98     }
  99 }
 100
 101 static int32_t U_CALLCONV
 102 getSPrepFoldingOffset(uint32_t data) {
 103
 104     return (int32_t)data;
 105
 106 }
 107
 108 /* hashes an entry  */
 109 static int32_t U_CALLCONV
 110 hashEntry(const UHashTok parm) {
 111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
 112     UHashTok namekey, pathkey;
 113     namekey.pointer = b->name;
 114     pathkey.pointer = b->path;
 115     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
 116 }
 117
 118 /* compares two entries */
 119 static UBool U_CALLCONV
 120 compareEntries(const UHashTok p1, const UHashTok p2) {
 121     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
 122     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
 123     UHashTok name1, name2, path1, path2;
 124     name1.pointer = b1->name;
 125     name2.pointer = b2->name;
 126     path1.pointer = b1->path;
 127     path2.pointer = b2->path;
 128     return ((UBool)(uhash_compareChars(name1, name2) &
 129         uhash_compareChars(path1, path2)));
 130 }
 131
 132 static void
 133 usprep_unload(UStringPrepProfile* data){
 134     udata_close(data->sprepData);
 135 }
 136
 137 static int32_t
 138 usprep_internal_flushCache(UBool noRefCount){
 139     UStringPrepProfile *profile = NULL;
 140     UStringPrepKey  *key  = NULL;
 141     int32_t pos = UHASH_FIRST;
 142     int32_t deletedNum = 0;
 143     const UHashElement *e;
 144
 145     /*
 146      * if shared data hasn't even been lazy evaluated yet
 147      * return 0
 148      */
 149     umtx_lock(&usprepMutex);
 150     if (SHARED_DATA_HASHTABLE == NULL) {
 151         umtx_unlock(&usprepMutex);
 152         return 0;
 153     }
 154
 155     /*creates an enumeration to iterate through every element in the table */
 156     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
 157     {
 158         profile = (UStringPrepProfile *) e->value.pointer;
 159         key  = (UStringPrepKey *) e->key.pointer;
 160
 161         if ((noRefCount== FALSE && profile->refCount == 0) ||
 162              noRefCount== TRUE) {
 163             deletedNum++;
 164             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
 165
 166             /* unload the data */
 167             usprep_unload(profile);
 168
 169             if(key->name != NULL) {
 170                 uprv_free(key->name);
 171                 key->name=NULL;
 172             }
 173             if(key->path != NULL) {
 174                 uprv_free(key->path);
 175                 key->path=NULL;
 176             }
 177             uprv_free(profile);
 178             uprv_free(key);
 179         }
 180
 181     }
 182     umtx_unlock(&usprepMutex);
 183
 184     return deletedNum;
 185 }
 186
 187 /* Works just like ucnv_flushCache()
 188 static int32_t
 189 usprep_flushCache(){
 190     return usprep_internal_flushCache(FALSE);
 191 }
 192 */
 193
 194 static UBool U_CALLCONV usprep_cleanup(void){
 195     if (SHARED_DATA_HASHTABLE != NULL) {
 196         usprep_internal_flushCache(TRUE);
 197         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
 198             uhash_close(SHARED_DATA_HASHTABLE);
 199             SHARED_DATA_HASHTABLE = NULL;
 200         }
 201     }
 202     gSharedDataInitOnce.reset();
 203     return (SHARED_DATA_HASHTABLE == NULL);
 204 }
 205 U_CDECL_END
 206
 207
 208 /** Initializes the cache for resources */
 209 static void U_CALLCONV
 210 createCache(UErrorCode &status) {
 211     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
 212     if (U_FAILURE(status)) {
 213         SHARED_DATA_HASHTABLE = NULL;
 214     }
 215     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
 216 }
 217
 218 static void
 219 initCache(UErrorCode *status) {
 220     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
 221 }
 222
 223 static UBool U_CALLCONV
 224 loadData(UStringPrepProfile* profile,
 225          const char* path,
 226          const char* name,
 227          const char* type,
 228          UErrorCode* errorCode) {
 229     /* load Unicode SPREP data from file */
 230     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
 231     UDataMemory *dataMemory;
 232     const int32_t *p=NULL;
 233     const uint8_t *pb;
 234     UVersionInfo normUnicodeVersion;
 235     int32_t normUniVer, sprepUniVer, normCorrVer;
 236
 237     if(errorCode==NULL || U_FAILURE(*errorCode)) {
 238         return 0;
 239     }
 240
 241     /* open the data outside the mutex block */
 242     //TODO: change the path
 243     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
 244     if(U_FAILURE(*errorCode)) {
 245         return FALSE;
 246     }
 247
 248     p=(const int32_t *)udata_getMemory(dataMemory);
 249     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
 250     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
 251     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
 252
 253
 254     if(U_FAILURE(*errorCode)) {
 255         udata_close(dataMemory);
 256         return FALSE;
 257     }
 258
 259     /* in the mutex block, set the data for this process */
 260     umtx_lock(&usprepMutex);
 261     if(profile->sprepData==NULL) {
 262         profile->sprepData=dataMemory;
 263         dataMemory=NULL;
 264         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
 265         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
 266     } else {
 267         p=(const int32_t *)udata_getMemory(profile->sprepData);
 268     }
 269     umtx_unlock(&usprepMutex);
 270     /* initialize some variables */
 271     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
 272
 273     u_getUnicodeVersion(normUnicodeVersion);
 274     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
 275                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
 276     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
 277                   (dataVersion[2] << 8 ) + (dataVersion[3]);
 278     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
 279
 280     if(U_FAILURE(*errorCode)){
 281         udata_close(dataMemory);
 282         return FALSE;
 283     }
 284     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
 285         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
 286         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
 287       ){
 288         *errorCode = U_INVALID_FORMAT_ERROR;
 289         udata_close(dataMemory);
 290         return FALSE;
 291     }
 292     profile->isDataLoaded = TRUE;
 293
 294     /* if a different thread set it first, then close the extra data */
 295     if(dataMemory!=NULL) {
 296         udata_close(dataMemory); /* NULL if it was set correctly */
 297     }
 298
 299
 300     return profile->isDataLoaded;
 301 }
 302
 303 static UStringPrepProfile*
 304 usprep_getProfile(const char* path,
 305                   const char* name,
 306                   UErrorCode *status){
 307
 308     UStringPrepProfile* profile = NULL;
 309
 310     initCache(status);
 311
 312     if(U_FAILURE(*status)){
 313         return NULL;
 314     }
 315
 316     UStringPrepKey stackKey;
 317     /*
 318      * const is cast way to save malloc, strcpy and free calls
 319      * we use the passed in pointers for fetching the data from the
 320      * hash table which is safe
 321      */
 322     stackKey.name = (char*) name;
 323     stackKey.path = (char*) path;
 324
 325     /* fetch the data from the cache */
 326     umtx_lock(&usprepMutex);
 327     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
 328     if(profile != NULL) {
 329         profile->refCount++;
 330     }
 331     umtx_unlock(&usprepMutex);
 332
 333     if(profile == NULL) {
 334         /* else load the data and put the data in the cache */
 335         LocalMemory<UStringPrepProfile> newProfile;
 336         if(newProfile.allocateInsteadAndReset() == NULL) {
 337             *status = U_MEMORY_ALLOCATION_ERROR;
 338             return NULL;
 339         }
 340
 341         /* load the data */
 342         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
 343             return NULL;
 344         }
 345
 346         /* get the options */
 347         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
 348         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
 349
 350         LocalMemory<UStringPrepKey> key;
 351         LocalMemory<char> keyName;
 352         LocalMemory<char> keyPath;
 353         if( key.allocateInsteadAndReset() == NULL ||
 354             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
 355             (path != NULL &&
 356              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
 357          ) {
 358             *status = U_MEMORY_ALLOCATION_ERROR;
 359             usprep_unload(newProfile.getAlias());
 360             return NULL;
 361         }
 362
 363         umtx_lock(&usprepMutex);
 364         // If another thread already inserted the same key/value, refcount and cleanup our thread data
 365         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
 366         if(profile != NULL) {
 367             profile->refCount++;
 368             usprep_unload(newProfile.getAlias());
 369         }
 370         else {
 371             /* initialize the key members */
 372             key->name = keyName.orphan();
 373             uprv_strcpy(key->name, name);
 374             if(path != NULL){
 375                 key->path = keyPath.orphan();
 376                 uprv_strcpy(key->path, path);
 377             }
 378             profile = newProfile.orphan();
 379
 380             /* add the data object to the cache */
 381             profile->refCount = 1;
 382             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
 383         }
 384         umtx_unlock(&usprepMutex);
 385     }
 386
 387     return profile;
 388 }
 389
 390 U_CAPI UStringPrepProfile* U_EXPORT2
 391 usprep_open(const char* path,
 392             const char* name,
 393             UErrorCode* status){
 394
 395     if(status == NULL || U_FAILURE(*status)){
 396         return NULL;
 397     }
 398
 399     /* initialize the profile struct members */
 400     return usprep_getProfile(path,name,status);
 401 }
 402
 403 U_CAPI UStringPrepProfile* U_EXPORT2
 404 usprep_openByType(UStringPrepProfileType type,
 405                                   UErrorCode* status) {
 406     if(status == NULL || U_FAILURE(*status)){
 407         return NULL;
 408     }
 409     int32_t index = (int32_t)type;
 410     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
 411         *status = U_ILLEGAL_ARGUMENT_ERROR;
 412         return NULL;
 413     }
 414     return usprep_open(NULL, PROFILE_NAMES[index], status);
 415 }
 416
 417 U_CAPI void U_EXPORT2
 418 usprep_close(UStringPrepProfile* profile){
 419     if(profile==NULL){
 420         return;
 421     }
 422
 423     umtx_lock(&usprepMutex);
 424     /* decrement the ref count*/
 425     if(profile->refCount > 0){
 426         profile->refCount--;
 427     }
 428     umtx_unlock(&usprepMutex);
 429
 430 }
 431
 432 U_CFUNC void
 433 uprv_syntaxError(const UChar* rules,
 434                  int32_t pos,
 435                  int32_t rulesLen,
 436                  UParseError* parseError){
 437     if(parseError == NULL){
 438         return;
 439     }
 440     parseError->offset = pos;
 441     parseError->line = 0 ; // we are not using line numbers
 442
 443     // for pre-context
 444     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
 445     int32_t limit = pos;
 446
 447     u_memcpy(parseError->preContext,rules+start,limit-start);
 448     //null terminate the buffer
 449     parseError->preContext[limit-start] = 0;
 450
 451     // for post-context; include error rules[pos]
 452     start = pos;
 453     limit = start + (U_PARSE_CONTEXT_LEN-1);
 454     if (limit > rulesLen) {
 455         limit = rulesLen;
 456     }
 457     if (start < rulesLen) {
 458         u_memcpy(parseError->postContext,rules+start,limit-start);
 459     }
 460     //null terminate the buffer
 461     parseError->postContext[limit-start]= 0;
 462 }
 463
 464
 465 static inline UStringPrepType
 466 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
 467
 468     UStringPrepType type;
 469     if(trieWord == 0){
 470         /*
 471          * Initial value stored in the mapping table
 472          * just return USPREP_TYPE_LIMIT .. so that
 473          * the source codepoint is copied to the destination
 474          */
 475         type = USPREP_TYPE_LIMIT;
 476         isIndex =FALSE;
 477         value = 0;
 478     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
 479         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
 480         isIndex =FALSE;
 481         value = 0;
 482     }else{
 483         /* get the type */
 484         type = USPREP_MAP;
 485         /* ascertain if the value is index or delta */
 486         if(trieWord & 0x02){
 487             isIndex = TRUE;
 488             value = trieWord  >> 2; //mask off the lower 2 bits and shift
 489         }else{
 490             isIndex = FALSE;
 491             value = (int16_t)trieWord;
 492             value =  (value >> 2);
 493         }
 494
 495         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
 496             type = USPREP_DELETE;
 497             isIndex =FALSE;
 498             value = 0;
 499         }
 500     }
 501     return type;
 502 }
 503
 504 // TODO: change to writing to UnicodeString not UChar *
 505 static int32_t
 506 usprep_map(  const UStringPrepProfile* profile,
 507              const UChar* src, int32_t srcLength,
 508              UChar* dest, int32_t destCapacity,
 509              int32_t options,
 510              UParseError* parseError,
 511              UErrorCode* status ){
 512
 513     uint16_t result;
 514     int32_t destIndex=0;
 515     int32_t srcIndex;
 516     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
 517     UStringPrepType type;
 518     int16_t value;
 519     UBool isIndex;
 520     const int32_t* indexes = profile->indexes;
 521
 522     // no error checking the caller check for error and arguments
 523     // no string length check the caller finds out the string length
 524
 525     for(srcIndex=0;srcIndex<srcLength;){
 526         UChar32 ch;
 527
 528         U16_NEXT(src,srcIndex,srcLength,ch);
 529
 530         result=0;
 531
 532         UTRIE_GET16(&profile->sprepTrie,ch,result);
 533
 534         type = getValues(result, value, isIndex);
 535
 536         // check if the source codepoint is unassigned
 537         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
 538
 539             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
 540             *status = U_STRINGPREP_UNASSIGNED_ERROR;
 541             return 0;
 542
 543         }else if(type == USPREP_MAP){
 544
 545             int32_t index, length;
 546
 547             if(isIndex){
 548                 index = value;
 549                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
 550                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
 551                     length = 1;
 552                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
 553                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
 554                     length = 2;
 555                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
 556                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
 557                     length = 3;
 558                 }else{
 559                     length = profile->mappingData[index++];
 560
 561                 }
 562
 563                 /* copy mapping to destination */
 564                 for(int32_t i=0; i< length; i++){
 565                     if(destIndex < destCapacity  ){
 566                         dest[destIndex] = profile->mappingData[index+i];
 567                     }
 568                     destIndex++; /* for pre-flighting */
 569                 }
 570                 continue;
 571             }else{
 572                 // subtract the delta to arrive at the code point
 573                 ch -= value;
 574             }
 575
 576         }else if(type==USPREP_DELETE){
 577              // just consume the codepoint and contine
 578             continue;
 579         }
 580         //copy the code point into destination
 581         if(ch <= 0xFFFF){
 582             if(destIndex < destCapacity ){
 583                 dest[destIndex] = (UChar)ch;
 584             }
 585             destIndex++;
 586         }else{
 587             if(destIndex+1 < destCapacity ){
 588                 dest[destIndex]   = U16_LEAD(ch);
 589                 dest[destIndex+1] = U16_TRAIL(ch);
 590             }
 591             destIndex +=2;
 592         }
 593
 594     }
 595
 596     return u_terminateUChars(dest, destCapacity, destIndex, status);
 597 }
 598
 599 /*
 600    1) Map -- For each character in the input, check if it has a mapping
 601       and, if so, replace it with its mapping.
 602
 603    2) Normalize -- Possibly normalize the result of step 1 using Unicode
 604       normalization.
 605
 606    3) Prohibit -- Check for any characters that are not allowed in the
 607       output.  If any are found, return an error.
 608
 609    4) Check bidi -- Possibly check for right-to-left characters, and if
 610       any are found, make sure that the whole string satisfies the
 611       requirements for bidirectional strings.  If the string does not
 612       satisfy the requirements for bidirectional strings, return an
 613       error.
 614       [Unicode3.2] defines several bidirectional categories; each character
 615        has one bidirectional category assigned to it.  For the purposes of
 616        the requirements below, an "RandALCat character" is a character that
 617        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
 618        is a character that has Unicode bidirectional category "L".  Note
 619
 620
 621        that there are many characters which fall in neither of the above
 622        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
 623        this because they have bidirectional category "EN".
 624
 625        In any profile that specifies bidirectional character handling, all
 626        three of the following requirements MUST be met:
 627
 628        1) The characters in section 5.8 MUST be prohibited.
 629
 630        2) If a string contains any RandALCat character, the string MUST NOT
 631           contain any LCat character.
 632
 633        3) If a string contains any RandALCat character, a RandALCat
 634           character MUST be the first character of the string, and a
 635           RandALCat character MUST be the last character of the string.
 636 */
 637 U_CAPI int32_t U_EXPORT2
 638 usprep_prepare(   const UStringPrepProfile* profile,
 639                   const UChar* src, int32_t srcLength,
 640                   UChar* dest, int32_t destCapacity,
 641                   int32_t options,
 642                   UParseError* parseError,
 643                   UErrorCode* status ){
 644
 645     // check error status
 646     if(U_FAILURE(*status)){
 647         return 0;
 648     }
 649
 650     //check arguments
 651     if(profile==NULL ||
 652             (src==NULL ? srcLength!=0 : srcLength<-1) ||
 653             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
 654         *status=U_ILLEGAL_ARGUMENT_ERROR;
 655         return 0;
 656     }
 657
 658     //get the string length
 659     if(srcLength < 0){
 660         srcLength = u_strlen(src);
 661     }
 662     // map
 663     UnicodeString s1;
 664     UChar *b1 = s1.getBuffer(srcLength);
 665     if(b1==NULL){
 666         *status = U_MEMORY_ALLOCATION_ERROR;
 667         return 0;
 668     }
 669     int32_t b1Len = usprep_map(profile, src, srcLength,
 670                                b1, s1.getCapacity(), options, parseError, status);
 671     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
 672
 673     if(*status == U_BUFFER_OVERFLOW_ERROR){
 674         // redo processing of string
 675         /* we do not have enough room so grow the buffer*/
 676         b1 = s1.getBuffer(b1Len);
 677         if(b1==NULL){
 678             *status = U_MEMORY_ALLOCATION_ERROR;
 679             return 0;
 680         }
 681
 682         *status = U_ZERO_ERROR; // reset error
 683         b1Len = usprep_map(profile, src, srcLength,
 684                            b1, s1.getCapacity(), options, parseError, status);
 685         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
 686     }
 687     if(U_FAILURE(*status)){
 688         return 0;
 689     }
 690
 691     // normalize
 692     UnicodeString s2;
 693     if(profile->doNFKC){
 694         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
 695         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
 696         if(U_FAILURE(*status)){
 697             return 0;
 698         }
 699         fn2.normalize(s1, s2, *status);
 700     }else{
 701         s2.fastCopyFrom(s1);
 702     }
 703     if(U_FAILURE(*status)){
 704         return 0;
 705     }
 706
 707     // Prohibit and checkBiDi in one pass
 708     const UChar *b2 = s2.getBuffer();
 709     int32_t b2Len = s2.length();
 710     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
 711     UBool leftToRight=FALSE, rightToLeft=FALSE;
 712     int32_t rtlPos =-1, ltrPos =-1;
 713
 714     for(int32_t b2Index=0; b2Index<b2Len;){
 715         UChar32 ch = 0;
 716         U16_NEXT(b2, b2Index, b2Len, ch);
 717
 718         uint16_t result;
 719         UTRIE_GET16(&profile->sprepTrie,ch,result);
 720
 721         int16_t value;
 722         UBool isIndex;
 723         UStringPrepType type = getValues(result, value, isIndex);
 724
 725         if( type == USPREP_PROHIBITED ||
 726             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
 727            ){
 728             *status = U_STRINGPREP_PROHIBITED_ERROR;
 729             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
 730             return 0;
 731         }
 732
 733         if(profile->checkBiDi) {
 734             direction = ubidi_getClass(ch);
 735             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
 736                 firstCharDir = direction;
 737             }
 738             if(direction == U_LEFT_TO_RIGHT){
 739                 leftToRight = TRUE;
 740                 ltrPos = b2Index-1;
 741             }
 742             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
 743                 rightToLeft = TRUE;
 744                 rtlPos = b2Index-1;
 745             }
 746         }
 747     }
 748     if(profile->checkBiDi == TRUE){
 749         // satisfy 2
 750         if( leftToRight == TRUE && rightToLeft == TRUE){
 751             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 752             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
 753             return 0;
 754         }
 755
 756         //satisfy 3
 757         if( rightToLeft == TRUE &&
 758             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
 759               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
 760            ){
 761             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 762             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
 763             return FALSE;
 764         }
 765     }
 766     return s2.extract(dest, destCapacity, *status);
 767 }
 768
 769
 770 /* data swapping ------------------------------------------------------------ */
 771
 772 U_CAPI int32_t U_EXPORT2
 773 usprep_swap(const UDataSwapper *ds,
 774             const void *inData, int32_t length, void *outData,
 775             UErrorCode *pErrorCode) {
 776     const UDataInfo *pInfo;
 777     int32_t headerSize;
 778
 779     const uint8_t *inBytes;
 780     uint8_t *outBytes;
 781
 782     const int32_t *inIndexes;
 783     int32_t indexes[16];
 784
 785     int32_t i, offset, count, size;
 786
 787     /* udata_swapDataHeader checks the arguments */
 788     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
 789     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
 790         return 0;
 791     }
 792
 793     /* check data format and format version */
 794     pInfo=(const UDataInfo *)((const char *)inData+4);
 795     if(!(
 796         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
 797         pInfo->dataFormat[1]==0x50 &&
 798         pInfo->dataFormat[2]==0x52 &&
 799         pInfo->dataFormat[3]==0x50 &&
 800         pInfo->formatVersion[0]==3
 801     )) {
 802         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
 803                          pInfo->dataFormat[0], pInfo->dataFormat[1],
 804                          pInfo->dataFormat[2], pInfo->dataFormat[3],
 805                          pInfo->formatVersion[0]);
 806         *pErrorCode=U_UNSUPPORTED_ERROR;
 807         return 0;
 808     }
 809
 810     inBytes=(const uint8_t *)inData+headerSize;
 811     outBytes=(uint8_t *)outData+headerSize;
 812
 813     inIndexes=(const int32_t *)inBytes;
 814
 815     if(length>=0) {
 816         length-=headerSize;
 817         if(length<16*4) {
 818             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
 819                              length);
 820             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 821             return 0;
 822         }
 823     }
 824
 825     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
 826     for(i=0; i<16; ++i) {
 827         indexes[i]=udata_readInt32(ds, inIndexes[i]);
 828     }
 829
 830     /* calculate the total length of the data */
 831     size=
 832         16*4+ /* size of indexes[] */
 833         indexes[_SPREP_INDEX_TRIE_SIZE]+
 834         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 835
 836     if(length>=0) {
 837         if(length<size) {
 838             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
 839                              length);
 840             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 841             return 0;
 842         }
 843
 844         /* copy the data for inaccessible bytes */
 845         if(inBytes!=outBytes) {
 846             uprv_memcpy(outBytes, inBytes, size);
 847         }
 848
 849         offset=0;
 850
 851         /* swap the int32_t indexes[] */
 852         count=16*4;
 853         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
 854         offset+=count;
 855
 856         /* swap the UTrie */
 857         count=indexes[_SPREP_INDEX_TRIE_SIZE];
 858         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 859         offset+=count;
 860
 861         /* swap the uint16_t mappingTable[] */
 862         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 863         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 864         //offset+=count;
 865     }
 866
 867     return headerSize+size;
 868 }
 869
 870 #endif /* #if !UCONFIG_NO_IDNA */