icuSources/common/usprep.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4  *******************************************************************************
   5  *
   6  *   Copyright (C) 2003-2016, International Business Machines
   7  *   Corporation and others.  All Rights Reserved.
   8  *
   9  *******************************************************************************
  10  *   file name:  usprep.cpp
  11  *   encoding:   UTF-8
  12  *   tab size:   8 (not used)
  13  *   indentation:4
  14  *
  15  *   created on: 2003jul2
  16  *   created by: Ram Viswanadha
  17  */
  18
  19 #include "unicode/utypes.h"
  20
  21 #if !UCONFIG_NO_IDNA
  22
  23 #include "unicode/usprep.h"
  24
  25 #include "unicode/normalizer2.h"
  26 #include "unicode/ustring.h"
  27 #include "unicode/uchar.h"
  28 #include "unicode/uversion.h"
  29 #include "umutex.h"
  30 #include "cmemory.h"
  31 #include "sprpimpl.h"
  32 #include "ustr_imp.h"
  33 #include "uhash.h"
  34 #include "cstring.h"
  35 #include "udataswp.h"
  36 #include "ucln_cmn.h"
  37 #include "ubidi_props.h"
  38 #include "uprops.h"
  39
  40 U_NAMESPACE_USE
  41
  42 U_CDECL_BEGIN
  43
  44 /*
  45 Static cache for already opened StringPrep profiles
  46 */
  47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
  48 static icu::UInitOnce gSharedDataInitOnce;
  49
  50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
  51
  52 /* format version of spp file */
  53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
  54
  55 /* the Unicode version of the sprep data */
  56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
  57
  58 /* Profile names must be aligned to UStringPrepProfileType */
  59 static const char * const PROFILE_NAMES[] = {
  60     "rfc3491",      /* USPREP_RFC3491_NAMEPREP */
  61     "rfc3530cs",    /* USPREP_RFC3530_NFS4_CS_PREP */
  62     "rfc3530csci",  /* USPREP_RFC3530_NFS4_CS_PREP_CI */
  63     "rfc3491",      /* USPREP_RFC3530_NSF4_CIS_PREP */
  64     "rfc3530mixp",  /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
  65     "rfc3491",      /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
  66     "rfc3722",      /* USPREP_RFC3722_ISCSI */
  67     "rfc3920node",  /* USPREP_RFC3920_NODEPREP */
  68     "rfc3920res",   /* USPREP_RFC3920_RESOURCEPREP */
  69     "rfc4011",      /* USPREP_RFC4011_MIB */
  70     "rfc4013",      /* USPREP_RFC4013_SASLPREP */
  71     "rfc4505",      /* USPREP_RFC4505_TRACE */
  72     "rfc4518",      /* USPREP_RFC4518_LDAP */
  73     "rfc4518ci",    /* USPREP_RFC4518_LDAP_CI */
  74 };
  75
  76 static UBool U_CALLCONV
  77 isSPrepAcceptable(void * /* context */,
  78              const char * /* type */,
  79              const char * /* name */,
  80              const UDataInfo *pInfo) {
  81     if(
  82         pInfo->size>=20 &&
  83         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  84         pInfo->charsetFamily==U_CHARSET_FAMILY &&
  85         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
  86         pInfo->dataFormat[1]==0x50 &&
  87         pInfo->dataFormat[2]==0x52 &&
  88         pInfo->dataFormat[3]==0x50 &&
  89         pInfo->formatVersion[0]==3 &&
  90         pInfo->formatVersion[2]==UTRIE_SHIFT &&
  91         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
  92     ) {
  93         //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
  94         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
  95         return TRUE;
  96     } else {
  97         return FALSE;
  98     }
  99 }
 100
 101 static int32_t U_CALLCONV
 102 getSPrepFoldingOffset(uint32_t data) {
 103
 104     return (int32_t)data;
 105
 106 }
 107
 108 /* hashes an entry  */
 109 static int32_t U_CALLCONV
 110 hashEntry(const UHashTok parm) {
 111     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
 112     UHashTok namekey, pathkey;
 113     namekey.pointer = b->name;
 114     pathkey.pointer = b->path;
 115     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
 116 }
 117
 118 /* compares two entries */
 119 static UBool U_CALLCONV
 120 compareEntries(const UHashTok p1, const UHashTok p2) {
 121     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
 122     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
 123     UHashTok name1, name2, path1, path2;
 124     name1.pointer = b1->name;
 125     name2.pointer = b2->name;
 126     path1.pointer = b1->path;
 127     path2.pointer = b2->path;
 128     return ((UBool)(uhash_compareChars(name1, name2) &
 129         uhash_compareChars(path1, path2)));
 130 }
 131
 132 static void
 133 usprep_unload(UStringPrepProfile* data){
 134     udata_close(data->sprepData);
 135 }
 136
 137 static int32_t
 138 usprep_internal_flushCache(UBool noRefCount){
 139     UStringPrepProfile *profile = NULL;
 140     UStringPrepKey  *key  = NULL;
 141     int32_t pos = UHASH_FIRST;
 142     int32_t deletedNum = 0;
 143     const UHashElement *e;
 144
 145     /*
 146      * if shared data hasn't even been lazy evaluated yet
 147      * return 0
 148      */
 149     umtx_lock(&usprepMutex);
 150     if (SHARED_DATA_HASHTABLE == NULL) {
 151         umtx_unlock(&usprepMutex);
 152         return 0;
 153     }
 154
 155     /*creates an enumeration to iterate through every element in the table */
 156     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
 157     {
 158         profile = (UStringPrepProfile *) e->value.pointer;
 159         key  = (UStringPrepKey *) e->key.pointer;
 160
 161         if ((noRefCount== FALSE && profile->refCount == 0) ||
 162              noRefCount== TRUE) {
 163             deletedNum++;
 164             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
 165
 166             /* unload the data */
 167             usprep_unload(profile);
 168
 169             if(key->name != NULL) {
 170                 uprv_free(key->name);
 171                 key->name=NULL;
 172             }
 173             if(key->path != NULL) {
 174                 uprv_free(key->path);
 175                 key->path=NULL;
 176             }
 177             uprv_free(profile);
 178             uprv_free(key);
 179         }
 180
 181     }
 182     umtx_unlock(&usprepMutex);
 183
 184     return deletedNum;
 185 }
 186
 187 /* Works just like ucnv_flushCache()
 188 static int32_t
 189 usprep_flushCache(){
 190     return usprep_internal_flushCache(FALSE);
 191 }
 192 */
 193
 194 static UBool U_CALLCONV usprep_cleanup(void){
 195     if (SHARED_DATA_HASHTABLE != NULL) {
 196         usprep_internal_flushCache(TRUE);
 197         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
 198             uhash_close(SHARED_DATA_HASHTABLE);
 199             SHARED_DATA_HASHTABLE = NULL;
 200         }
 201     }
 202     gSharedDataInitOnce.reset();
 203     return (SHARED_DATA_HASHTABLE == NULL);
 204 }
 205 U_CDECL_END
 206
 207
 208 /** Initializes the cache for resources */
 209 static void U_CALLCONV
 210 createCache(UErrorCode &status) {
 211     SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
 212     if (U_FAILURE(status)) {
 213         SHARED_DATA_HASHTABLE = NULL;
 214     }
 215     ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
 216 }
 217
 218 static void
 219 initCache(UErrorCode *status) {
 220     umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
 221 }
 222
 223 static UBool U_CALLCONV
 224 loadData(UStringPrepProfile* profile,
 225          const char* path,
 226          const char* name,
 227          const char* type,
 228          UErrorCode* errorCode) {
 229     /* load Unicode SPREP data from file */
 230     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
 231     UDataMemory *dataMemory;
 232     const int32_t *p=NULL;
 233     const uint8_t *pb;
 234     UVersionInfo normUnicodeVersion;
 235     int32_t normUniVer, sprepUniVer, normCorrVer;
 236
 237     if(errorCode==NULL || U_FAILURE(*errorCode)) {
 238         return 0;
 239     }
 240
 241     /* open the data outside the mutex block */
 242     //TODO: change the path
 243     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
 244     if(U_FAILURE(*errorCode)) {
 245         return FALSE;
 246     }
 247
 248     p=(const int32_t *)udata_getMemory(dataMemory);
 249     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
 250     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
 251     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
 252
 253
 254     if(U_FAILURE(*errorCode)) {
 255         udata_close(dataMemory);
 256         return FALSE;
 257     }
 258
 259     /* in the mutex block, set the data for this process */
 260     umtx_lock(&usprepMutex);
 261     if(profile->sprepData==NULL) {
 262         profile->sprepData=dataMemory;
 263         dataMemory=NULL;
 264         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
 265         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
 266     } else {
 267         p=(const int32_t *)udata_getMemory(profile->sprepData);
 268     }
 269     umtx_unlock(&usprepMutex);
 270     /* initialize some variables */
 271     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
 272
 273     u_getUnicodeVersion(normUnicodeVersion);
 274     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
 275                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
 276     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
 277                   (dataVersion[2] << 8 ) + (dataVersion[3]);
 278     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
 279
 280     if(U_FAILURE(*errorCode)){
 281         udata_close(dataMemory);
 282         return FALSE;
 283     }
 284     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
 285         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
 286         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
 287       ){
 288         *errorCode = U_INVALID_FORMAT_ERROR;
 289         udata_close(dataMemory);
 290         return FALSE;
 291     }
 292     profile->isDataLoaded = TRUE;
 293
 294     /* if a different thread set it first, then close the extra data */
 295     if(dataMemory!=NULL) {
 296         udata_close(dataMemory); /* NULL if it was set correctly */
 297     }
 298
 299
 300     return profile->isDataLoaded;
 301 }
 302
 303 static UStringPrepProfile*
 304 usprep_getProfile(const char* path,
 305                   const char* name,
 306                   UErrorCode *status){
 307
 308     UStringPrepProfile* profile = NULL;
 309
 310     initCache(status);
 311
 312     if(U_FAILURE(*status)){
 313         return NULL;
 314     }
 315
 316     UStringPrepKey stackKey;
 317     /*
 318      * const is cast way to save malloc, strcpy and free calls
 319      * we use the passed in pointers for fetching the data from the
 320      * hash table which is safe
 321      */
 322     stackKey.name = (char*) name;
 323     stackKey.path = (char*) path;
 324
 325     /* fetch the data from the cache */
 326     umtx_lock(&usprepMutex);
 327     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
 328     if(profile != NULL) {
 329         profile->refCount++;
 330     }
 331     umtx_unlock(&usprepMutex);
 332
 333     if(profile == NULL) {
 334         /* else load the data and put the data in the cache */
 335         LocalMemory<UStringPrepProfile> newProfile;
 336         if(newProfile.allocateInsteadAndReset() == NULL) {
 337             *status = U_MEMORY_ALLOCATION_ERROR;
 338             return NULL;
 339         }
 340
 341         /* load the data */
 342         if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
 343             return NULL;
 344         }
 345
 346         /* get the options */
 347         newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
 348         newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
 349
 350         if(newProfile->checkBiDi) {
 351             newProfile->bdp = ubidi_getSingleton();
 352         }
 353
 354         LocalMemory<UStringPrepKey> key;
 355         LocalMemory<char> keyName;
 356         LocalMemory<char> keyPath;
 357         if( key.allocateInsteadAndReset() == NULL ||
 358             keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
 359             (path != NULL &&
 360              keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
 361          ) {
 362             *status = U_MEMORY_ALLOCATION_ERROR;
 363             usprep_unload(newProfile.getAlias());
 364             return NULL;
 365         }
 366
 367         umtx_lock(&usprepMutex);
 368         // If another thread already inserted the same key/value, refcount and cleanup our thread data
 369         profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
 370         if(profile != NULL) {
 371             profile->refCount++;
 372             usprep_unload(newProfile.getAlias());
 373         }
 374         else {
 375             /* initialize the key members */
 376             key->name = keyName.orphan();
 377             uprv_strcpy(key->name, name);
 378             if(path != NULL){
 379                 key->path = keyPath.orphan();
 380                 uprv_strcpy(key->path, path);
 381             }
 382             profile = newProfile.orphan();
 383
 384             /* add the data object to the cache */
 385             profile->refCount = 1;
 386             uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
 387         }
 388         umtx_unlock(&usprepMutex);
 389     }
 390
 391     return profile;
 392 }
 393
 394 U_CAPI UStringPrepProfile* U_EXPORT2
 395 usprep_open(const char* path,
 396             const char* name,
 397             UErrorCode* status){
 398
 399     if(status == NULL || U_FAILURE(*status)){
 400         return NULL;
 401     }
 402
 403     /* initialize the profile struct members */
 404     return usprep_getProfile(path,name,status);
 405 }
 406
 407 U_CAPI UStringPrepProfile* U_EXPORT2
 408 usprep_openByType(UStringPrepProfileType type,
 409                                   UErrorCode* status) {
 410     if(status == NULL || U_FAILURE(*status)){
 411         return NULL;
 412     }
 413     int32_t index = (int32_t)type;
 414     if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
 415         *status = U_ILLEGAL_ARGUMENT_ERROR;
 416         return NULL;
 417     }
 418     return usprep_open(NULL, PROFILE_NAMES[index], status);
 419 }
 420
 421 U_CAPI void U_EXPORT2
 422 usprep_close(UStringPrepProfile* profile){
 423     if(profile==NULL){
 424         return;
 425     }
 426
 427     umtx_lock(&usprepMutex);
 428     /* decrement the ref count*/
 429     if(profile->refCount > 0){
 430         profile->refCount--;
 431     }
 432     umtx_unlock(&usprepMutex);
 433
 434 }
 435
 436 U_CFUNC void
 437 uprv_syntaxError(const UChar* rules,
 438                  int32_t pos,
 439                  int32_t rulesLen,
 440                  UParseError* parseError){
 441     if(parseError == NULL){
 442         return;
 443     }
 444     parseError->offset = pos;
 445     parseError->line = 0 ; // we are not using line numbers
 446
 447     // for pre-context
 448     int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
 449     int32_t limit = pos;
 450
 451     u_memcpy(parseError->preContext,rules+start,limit-start);
 452     //null terminate the buffer
 453     parseError->preContext[limit-start] = 0;
 454
 455     // for post-context; include error rules[pos]
 456     start = pos;
 457     limit = start + (U_PARSE_CONTEXT_LEN-1);
 458     if (limit > rulesLen) {
 459         limit = rulesLen;
 460     }
 461     if (start < rulesLen) {
 462         u_memcpy(parseError->postContext,rules+start,limit-start);
 463     }
 464     //null terminate the buffer
 465     parseError->postContext[limit-start]= 0;
 466 }
 467
 468
 469 static inline UStringPrepType
 470 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
 471
 472     UStringPrepType type;
 473     if(trieWord == 0){
 474         /*
 475          * Initial value stored in the mapping table
 476          * just return USPREP_TYPE_LIMIT .. so that
 477          * the source codepoint is copied to the destination
 478          */
 479         type = USPREP_TYPE_LIMIT;
 480         isIndex =FALSE;
 481         value = 0;
 482     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
 483         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
 484         isIndex =FALSE;
 485         value = 0;
 486     }else{
 487         /* get the type */
 488         type = USPREP_MAP;
 489         /* ascertain if the value is index or delta */
 490         if(trieWord & 0x02){
 491             isIndex = TRUE;
 492             value = trieWord  >> 2; //mask off the lower 2 bits and shift
 493         }else{
 494             isIndex = FALSE;
 495             value = (int16_t)trieWord;
 496             value =  (value >> 2);
 497         }
 498
 499         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
 500             type = USPREP_DELETE;
 501             isIndex =FALSE;
 502             value = 0;
 503         }
 504     }
 505     return type;
 506 }
 507
 508 // TODO: change to writing to UnicodeString not UChar *
 509 static int32_t
 510 usprep_map(  const UStringPrepProfile* profile,
 511              const UChar* src, int32_t srcLength,
 512              UChar* dest, int32_t destCapacity,
 513              int32_t options,
 514              UParseError* parseError,
 515              UErrorCode* status ){
 516
 517     uint16_t result;
 518     int32_t destIndex=0;
 519     int32_t srcIndex;
 520     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
 521     UStringPrepType type;
 522     int16_t value;
 523     UBool isIndex;
 524     const int32_t* indexes = profile->indexes;
 525
 526     // no error checking the caller check for error and arguments
 527     // no string length check the caller finds out the string length
 528
 529     for(srcIndex=0;srcIndex<srcLength;){
 530         UChar32 ch;
 531
 532         U16_NEXT(src,srcIndex,srcLength,ch);
 533
 534         result=0;
 535
 536         UTRIE_GET16(&profile->sprepTrie,ch,result);
 537
 538         type = getValues(result, value, isIndex);
 539
 540         // check if the source codepoint is unassigned
 541         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
 542
 543             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
 544             *status = U_STRINGPREP_UNASSIGNED_ERROR;
 545             return 0;
 546
 547         }else if(type == USPREP_MAP){
 548
 549             int32_t index, length;
 550
 551             if(isIndex){
 552                 index = value;
 553                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
 554                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
 555                     length = 1;
 556                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
 557                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
 558                     length = 2;
 559                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
 560                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
 561                     length = 3;
 562                 }else{
 563                     length = profile->mappingData[index++];
 564
 565                 }
 566
 567                 /* copy mapping to destination */
 568                 for(int32_t i=0; i< length; i++){
 569                     if(destIndex < destCapacity  ){
 570                         dest[destIndex] = profile->mappingData[index+i];
 571                     }
 572                     destIndex++; /* for pre-flighting */
 573                 }
 574                 continue;
 575             }else{
 576                 // subtract the delta to arrive at the code point
 577                 ch -= value;
 578             }
 579
 580         }else if(type==USPREP_DELETE){
 581              // just consume the codepoint and contine
 582             continue;
 583         }
 584         //copy the code point into destination
 585         if(ch <= 0xFFFF){
 586             if(destIndex < destCapacity ){
 587                 dest[destIndex] = (UChar)ch;
 588             }
 589             destIndex++;
 590         }else{
 591             if(destIndex+1 < destCapacity ){
 592                 dest[destIndex]   = U16_LEAD(ch);
 593                 dest[destIndex+1] = U16_TRAIL(ch);
 594             }
 595             destIndex +=2;
 596         }
 597
 598     }
 599
 600     return u_terminateUChars(dest, destCapacity, destIndex, status);
 601 }
 602
 603 /*
 604    1) Map -- For each character in the input, check if it has a mapping
 605       and, if so, replace it with its mapping.
 606
 607    2) Normalize -- Possibly normalize the result of step 1 using Unicode
 608       normalization.
 609
 610    3) Prohibit -- Check for any characters that are not allowed in the
 611       output.  If any are found, return an error.
 612
 613    4) Check bidi -- Possibly check for right-to-left characters, and if
 614       any are found, make sure that the whole string satisfies the
 615       requirements for bidirectional strings.  If the string does not
 616       satisfy the requirements for bidirectional strings, return an
 617       error.
 618       [Unicode3.2] defines several bidirectional categories; each character
 619        has one bidirectional category assigned to it.  For the purposes of
 620        the requirements below, an "RandALCat character" is a character that
 621        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
 622        is a character that has Unicode bidirectional category "L".  Note
 623
 624
 625        that there are many characters which fall in neither of the above
 626        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
 627        this because they have bidirectional category "EN".
 628
 629        In any profile that specifies bidirectional character handling, all
 630        three of the following requirements MUST be met:
 631
 632        1) The characters in section 5.8 MUST be prohibited.
 633
 634        2) If a string contains any RandALCat character, the string MUST NOT
 635           contain any LCat character.
 636
 637        3) If a string contains any RandALCat character, a RandALCat
 638           character MUST be the first character of the string, and a
 639           RandALCat character MUST be the last character of the string.
 640 */
 641 U_CAPI int32_t U_EXPORT2
 642 usprep_prepare(   const UStringPrepProfile* profile,
 643                   const UChar* src, int32_t srcLength,
 644                   UChar* dest, int32_t destCapacity,
 645                   int32_t options,
 646                   UParseError* parseError,
 647                   UErrorCode* status ){
 648
 649     // check error status
 650     if(U_FAILURE(*status)){
 651         return 0;
 652     }
 653
 654     //check arguments
 655     if(profile==NULL ||
 656             (src==NULL ? srcLength!=0 : srcLength<-1) ||
 657             (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
 658         *status=U_ILLEGAL_ARGUMENT_ERROR;
 659         return 0;
 660     }
 661
 662     //get the string length
 663     if(srcLength < 0){
 664         srcLength = u_strlen(src);
 665     }
 666     // map
 667     UnicodeString s1;
 668     UChar *b1 = s1.getBuffer(srcLength);
 669     if(b1==NULL){
 670         *status = U_MEMORY_ALLOCATION_ERROR;
 671         return 0;
 672     }
 673     int32_t b1Len = usprep_map(profile, src, srcLength,
 674                                b1, s1.getCapacity(), options, parseError, status);
 675     s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
 676
 677     if(*status == U_BUFFER_OVERFLOW_ERROR){
 678         // redo processing of string
 679         /* we do not have enough room so grow the buffer*/
 680         b1 = s1.getBuffer(b1Len);
 681         if(b1==NULL){
 682             *status = U_MEMORY_ALLOCATION_ERROR;
 683             return 0;
 684         }
 685
 686         *status = U_ZERO_ERROR; // reset error
 687         b1Len = usprep_map(profile, src, srcLength,
 688                            b1, s1.getCapacity(), options, parseError, status);
 689         s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
 690     }
 691     if(U_FAILURE(*status)){
 692         return 0;
 693     }
 694
 695     // normalize
 696     UnicodeString s2;
 697     if(profile->doNFKC){
 698         const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
 699         FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
 700         if(U_FAILURE(*status)){
 701             return 0;
 702         }
 703         fn2.normalize(s1, s2, *status);
 704     }else{
 705         s2.fastCopyFrom(s1);
 706     }
 707     if(U_FAILURE(*status)){
 708         return 0;
 709     }
 710
 711     // Prohibit and checkBiDi in one pass
 712     const UChar *b2 = s2.getBuffer();
 713     int32_t b2Len = s2.length();
 714     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
 715     UBool leftToRight=FALSE, rightToLeft=FALSE;
 716     int32_t rtlPos =-1, ltrPos =-1;
 717
 718     for(int32_t b2Index=0; b2Index<b2Len;){
 719         UChar32 ch = 0;
 720         U16_NEXT(b2, b2Index, b2Len, ch);
 721
 722         uint16_t result;
 723         UTRIE_GET16(&profile->sprepTrie,ch,result);
 724
 725         int16_t value;
 726         UBool isIndex;
 727         UStringPrepType type = getValues(result, value, isIndex);
 728
 729         if( type == USPREP_PROHIBITED ||
 730             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
 731            ){
 732             *status = U_STRINGPREP_PROHIBITED_ERROR;
 733             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
 734             return 0;
 735         }
 736
 737         if(profile->checkBiDi) {
 738             direction = ubidi_getClass(profile->bdp, ch);
 739             if(firstCharDir == U_CHAR_DIRECTION_COUNT){
 740                 firstCharDir = direction;
 741             }
 742             if(direction == U_LEFT_TO_RIGHT){
 743                 leftToRight = TRUE;
 744                 ltrPos = b2Index-1;
 745             }
 746             if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
 747                 rightToLeft = TRUE;
 748                 rtlPos = b2Index-1;
 749             }
 750         }
 751     }
 752     if(profile->checkBiDi == TRUE){
 753         // satisfy 2
 754         if( leftToRight == TRUE && rightToLeft == TRUE){
 755             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 756             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
 757             return 0;
 758         }
 759
 760         //satisfy 3
 761         if( rightToLeft == TRUE &&
 762             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
 763               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
 764            ){
 765             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 766             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
 767             return FALSE;
 768         }
 769     }
 770     return s2.extract(dest, destCapacity, *status);
 771 }
 772
 773
 774 /* data swapping ------------------------------------------------------------ */
 775
 776 U_CAPI int32_t U_EXPORT2
 777 usprep_swap(const UDataSwapper *ds,
 778             const void *inData, int32_t length, void *outData,
 779             UErrorCode *pErrorCode) {
 780     const UDataInfo *pInfo;
 781     int32_t headerSize;
 782
 783     const uint8_t *inBytes;
 784     uint8_t *outBytes;
 785
 786     const int32_t *inIndexes;
 787     int32_t indexes[16];
 788
 789     int32_t i, offset, count, size;
 790
 791     /* udata_swapDataHeader checks the arguments */
 792     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
 793     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
 794         return 0;
 795     }
 796
 797     /* check data format and format version */
 798     pInfo=(const UDataInfo *)((const char *)inData+4);
 799     if(!(
 800         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
 801         pInfo->dataFormat[1]==0x50 &&
 802         pInfo->dataFormat[2]==0x52 &&
 803         pInfo->dataFormat[3]==0x50 &&
 804         pInfo->formatVersion[0]==3
 805     )) {
 806         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
 807                          pInfo->dataFormat[0], pInfo->dataFormat[1],
 808                          pInfo->dataFormat[2], pInfo->dataFormat[3],
 809                          pInfo->formatVersion[0]);
 810         *pErrorCode=U_UNSUPPORTED_ERROR;
 811         return 0;
 812     }
 813
 814     inBytes=(const uint8_t *)inData+headerSize;
 815     outBytes=(uint8_t *)outData+headerSize;
 816
 817     inIndexes=(const int32_t *)inBytes;
 818
 819     if(length>=0) {
 820         length-=headerSize;
 821         if(length<16*4) {
 822             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
 823                              length);
 824             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 825             return 0;
 826         }
 827     }
 828
 829     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
 830     for(i=0; i<16; ++i) {
 831         indexes[i]=udata_readInt32(ds, inIndexes[i]);
 832     }
 833
 834     /* calculate the total length of the data */
 835     size=
 836         16*4+ /* size of indexes[] */
 837         indexes[_SPREP_INDEX_TRIE_SIZE]+
 838         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 839
 840     if(length>=0) {
 841         if(length<size) {
 842             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
 843                              length);
 844             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 845             return 0;
 846         }
 847
 848         /* copy the data for inaccessible bytes */
 849         if(inBytes!=outBytes) {
 850             uprv_memcpy(outBytes, inBytes, size);
 851         }
 852
 853         offset=0;
 854
 855         /* swap the int32_t indexes[] */
 856         count=16*4;
 857         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
 858         offset+=count;
 859
 860         /* swap the UTrie */
 861         count=indexes[_SPREP_INDEX_TRIE_SIZE];
 862         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 863         offset+=count;
 864
 865         /* swap the uint16_t mappingTable[] */
 866         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 867         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 868         //offset+=count;
 869     }
 870
 871     return headerSize+size;
 872 }
 873
 874 #endif /* #if !UCONFIG_NO_IDNA */