icuSources/common/usprep.cpp

   1 /*
   2  *******************************************************************************
   3  *
   4  *   Copyright (C) 2003-2004, International Business Machines
   5  *   Corporation and others.  All Rights Reserved.
   6  *
   7  *******************************************************************************
   8  *   file name:  usprep.cpp
   9  *   encoding:   US-ASCII
  10  *   tab size:   8 (not used)
  11  *   indentation:4
  12  *
  13  *   created on: 2003jul2
  14  *   created by: Ram Viswanadha
  15  */
  16
  17 #include "unicode/utypes.h"
  18
  19 #if !UCONFIG_NO_IDNA
  20
  21 #include "unicode/usprep.h"
  22
  23 #include "unicode/unorm.h"
  24 #include "unicode/ustring.h"
  25 #include "unicode/uchar.h"
  26 #include "unicode/uversion.h"
  27 #include "umutex.h"
  28 #include "cmemory.h"
  29 #include "sprpimpl.h"
  30 #include "ustr_imp.h"
  31 #include "uhash.h"
  32 #include "cstring.h"
  33 #include "udataswp.h"
  34 #include "ucln_cmn.h"
  35 #include "unormimp.h"
  36
  37 U_CDECL_BEGIN
  38
  39 /*
  40 Static cache for already opened StringPrep profiles
  41 */
  42 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
  43
  44 static UMTX usprepMutex = NULL;
  45
  46 /* format version of spp file */
  47 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
  48
  49 /* the Unicode version of the sprep data */
  50 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
  51
  52 static UBool U_CALLCONV
  53 isSPrepAcceptable(void * /* context */,
  54              const char * /* type */,
  55              const char * /* name */,
  56              const UDataInfo *pInfo) {
  57     if(
  58         pInfo->size>=20 &&
  59         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
  60         pInfo->charsetFamily==U_CHARSET_FAMILY &&
  61         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
  62         pInfo->dataFormat[1]==0x50 &&
  63         pInfo->dataFormat[2]==0x52 &&
  64         pInfo->dataFormat[3]==0x50 &&
  65         pInfo->formatVersion[0]==3 &&
  66         pInfo->formatVersion[2]==UTRIE_SHIFT &&
  67         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
  68     ) {
  69         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
  70         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
  71         return TRUE;
  72     } else {
  73         return FALSE;
  74     }
  75 }
  76
  77 static int32_t U_CALLCONV
  78 getSPrepFoldingOffset(uint32_t data) {
  79
  80     return (int32_t)data;
  81
  82 }
  83
  84 /* hashes an entry  */
  85 static int32_t U_EXPORT2 U_CALLCONV
  86 hashEntry(const UHashTok parm) {
  87     UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
  88     UHashTok namekey, pathkey;
  89     namekey.pointer = b->name;
  90     pathkey.pointer = b->path;
  91     return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
  92 }
  93
  94 /* compares two entries */
  95 static UBool U_EXPORT2 U_CALLCONV
  96 compareEntries(const UHashTok p1, const UHashTok p2) {
  97     UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
  98     UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
  99     UHashTok name1, name2, path1, path2;
 100     name1.pointer = b1->name;
 101     name2.pointer = b2->name;
 102     path1.pointer = b1->path;
 103     path2.pointer = b2->path;
 104     return ((UBool)(uhash_compareChars(name1, name2) &
 105         uhash_compareChars(path1, path2)));
 106 }
 107
 108 static void
 109 usprep_unload(UStringPrepProfile* data){
 110     udata_close(data->sprepData);
 111 }
 112
 113 static int32_t
 114 usprep_internal_flushCache(UBool noRefCount){
 115     UStringPrepProfile *profile = NULL;
 116     UStringPrepKey  *key  = NULL;
 117     int32_t pos = -1;
 118     int32_t deletedNum = 0;
 119     const UHashElement *e;
 120
 121     /*
 122      * if shared data hasn't even been lazy evaluated yet
 123      * return 0
 124      */
 125     umtx_lock(&usprepMutex);
 126     if (SHARED_DATA_HASHTABLE == NULL) {
 127         umtx_unlock(&usprepMutex);
 128         return 0;
 129     }
 130
 131     /*creates an enumeration to iterate through every element in the table */
 132     while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
 133     {
 134         profile = (UStringPrepProfile *) e->value.pointer;
 135         key  = (UStringPrepKey *) e->key.pointer;
 136
 137         if ((noRefCount== FALSE && profile->refCount == 0) ||
 138              noRefCount== TRUE) {
 139             deletedNum++;
 140             uhash_removeElement(SHARED_DATA_HASHTABLE, e);
 141
 142             /* unload the data */
 143             usprep_unload(profile);
 144
 145             if(key->name != NULL) {
 146                 uprv_free(key->name);
 147                 key->name=NULL;
 148             }
 149             if(key->path != NULL) {
 150                 uprv_free(key->path);
 151                 key->path=NULL;
 152             }
 153             uprv_free(profile);
 154             uprv_free(key);
 155         }
 156
 157     }
 158     umtx_unlock(&usprepMutex);
 159
 160     return deletedNum;
 161 }
 162
 163 /* Works just like ucnv_flushCache()
 164 static int32_t
 165 usprep_flushCache(){
 166     return usprep_internal_flushCache(FALSE);
 167 }
 168 */
 169
 170 static UBool U_CALLCONV usprep_cleanup(void){
 171     if (SHARED_DATA_HASHTABLE != NULL) {
 172         usprep_internal_flushCache(TRUE);
 173         if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
 174             uhash_close(SHARED_DATA_HASHTABLE);
 175             SHARED_DATA_HASHTABLE = NULL;
 176         }
 177     }
 178
 179     umtx_destroy(&usprepMutex);             /* Don't worry about destroying the mutex even  */
 180                                             /*  if the hash table still exists.  The mutex  */
 181                                             /*  will lazily re-init  itself if needed.      */
 182     return (SHARED_DATA_HASHTABLE == NULL);
 183 }
 184 U_CDECL_END
 185
 186 static void
 187 usprep_init() {
 188     umtx_init(&usprepMutex);
 189 }
 190
 191 /** Initializes the cache for resources */
 192 static void
 193 initCache(UErrorCode *status) {
 194     UBool makeCache = FALSE;
 195     umtx_lock(&usprepMutex);
 196     makeCache = (SHARED_DATA_HASHTABLE ==  NULL);
 197     umtx_unlock(&usprepMutex);
 198     if(makeCache) {
 199         UHashtable *newCache = uhash_open(hashEntry, compareEntries, status);
 200         if (U_FAILURE(*status)) {
 201             return;
 202         }
 203         umtx_lock(&usprepMutex);
 204         if(SHARED_DATA_HASHTABLE == NULL) {
 205             SHARED_DATA_HASHTABLE = newCache;
 206             ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
 207             newCache = NULL;
 208         }
 209         umtx_unlock(&usprepMutex);
 210         if(newCache != NULL) {
 211             uhash_close(newCache);
 212         }
 213     }
 214 }
 215
 216 static UBool U_CALLCONV
 217 loadData(UStringPrepProfile* profile,
 218          const char* path,
 219          const char* name,
 220          const char* type,
 221          UErrorCode* errorCode) {
 222     /* load Unicode SPREP data from file */
 223     UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
 224     UDataMemory *dataMemory;
 225     const int32_t *p=NULL;
 226     const uint8_t *pb;
 227     UVersionInfo normUnicodeVersion;
 228     int32_t normUniVer, sprepUniVer, normCorrVer;
 229
 230     if(errorCode==NULL || U_FAILURE(*errorCode)) {
 231         return 0;
 232     }
 233
 234     /* open the data outside the mutex block */
 235     //TODO: change the path
 236     dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
 237     if(U_FAILURE(*errorCode)) {
 238         return FALSE;
 239     }
 240
 241     p=(const int32_t *)udata_getMemory(dataMemory);
 242     pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
 243     utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
 244     _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
 245
 246
 247     if(U_FAILURE(*errorCode)) {
 248         udata_close(dataMemory);
 249         return FALSE;
 250     }
 251
 252     /* in the mutex block, set the data for this process */
 253     umtx_lock(&usprepMutex);
 254     if(profile->sprepData==NULL) {
 255         profile->sprepData=dataMemory;
 256         dataMemory=NULL;
 257         uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
 258         uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
 259     } else {
 260         p=(const int32_t *)udata_getMemory(profile->sprepData);
 261     }
 262     umtx_unlock(&usprepMutex);
 263     /* initialize some variables */
 264     profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
 265
 266     unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
 267     normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
 268                  (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
 269     sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
 270                   (dataVersion[2] << 8 ) + (dataVersion[3]);
 271     normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
 272
 273     if(U_FAILURE(*errorCode)){
 274         udata_close(dataMemory);
 275         return FALSE;
 276     }
 277     if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
 278         normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
 279         ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
 280       ){
 281         *errorCode = U_INVALID_FORMAT_ERROR;
 282         udata_close(dataMemory);
 283         return FALSE;
 284     }
 285     profile->isDataLoaded = TRUE;
 286
 287     /* if a different thread set it first, then close the extra data */
 288     if(dataMemory!=NULL) {
 289         udata_close(dataMemory); /* NULL if it was set correctly */
 290     }
 291
 292
 293     return profile->isDataLoaded;
 294 }
 295
 296 static UStringPrepProfile*
 297 usprep_getProfile(const char* path,
 298                   const char* name,
 299                   UErrorCode *status){
 300
 301     UStringPrepProfile* profile = NULL;
 302
 303     initCache(status);
 304
 305     if(U_FAILURE(*status)){
 306         return NULL;
 307     }
 308
 309     UStringPrepKey stackKey;
 310     /*
 311      * const is cast way to save malloc, strcpy and free calls
 312      * we use the passed in pointers for fetching the data from the
 313      * hash table which is safe
 314      */
 315     stackKey.name = (char*) name;
 316     stackKey.path = (char*) path;
 317
 318     /* fetch the data from the cache */
 319     profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
 320
 321     if(profile == NULL){
 322         UStringPrepKey* key   = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
 323         if(key == NULL){
 324             *status = U_MEMORY_ALLOCATION_ERROR;
 325             return NULL;
 326         }
 327         /* else load the data and put the data in the cache */
 328         profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
 329         if(profile == NULL){
 330             *status = U_MEMORY_ALLOCATION_ERROR;
 331             uprv_free(key);
 332             return NULL;
 333         }
 334
 335         /* initialize the data struct members */
 336         uprv_memset(profile->indexes,0,sizeof(profile->indexes));
 337         profile->mappingData = NULL;
 338         profile->sprepData   = NULL;
 339         profile->refCount    = 0;
 340
 341         /* initialize the  key memebers */
 342         key->name  = (char*) uprv_malloc(uprv_strlen(name)+1);
 343         if(key->name == NULL){
 344             *status = U_MEMORY_ALLOCATION_ERROR;
 345             uprv_free(key);
 346             uprv_free(profile);
 347             return NULL;
 348         }
 349
 350         uprv_strcpy(key->name, name);
 351
 352         key->path=NULL;
 353
 354         if(path != NULL){
 355             key->path      = (char*) uprv_malloc(uprv_strlen(path)+1);
 356             if(key->path == NULL){
 357                 *status = U_MEMORY_ALLOCATION_ERROR;
 358                 uprv_free(key->path);
 359                 uprv_free(key);
 360                 uprv_free(profile);
 361                 return NULL;
 362             }
 363             uprv_strcpy(key->path, path);
 364         }
 365
 366         /* load the data */
 367         if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
 368             return NULL;
 369         }
 370
 371         /* get the options */
 372         profile->doNFKC            = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
 373         profile->checkBiDi         = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
 374
 375         umtx_lock(&usprepMutex);
 376         /* add the data object to the cache */
 377         uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
 378         umtx_unlock(&usprepMutex);
 379     }
 380     umtx_lock(&usprepMutex);
 381     /* increment the refcount */
 382     profile->refCount++;
 383     umtx_unlock(&usprepMutex);
 384
 385     return profile;
 386 }
 387
 388 U_CAPI UStringPrepProfile* U_EXPORT2
 389 usprep_open(const char* path,
 390             const char* name,
 391             UErrorCode* status){
 392
 393     if(status == NULL || U_FAILURE(*status)){
 394         return NULL;
 395     }
 396     /* initialize the mutex */
 397     usprep_init();
 398
 399     /* initialize the profile struct members */
 400     return usprep_getProfile(path,name,status);;
 401 }
 402
 403 U_CAPI void U_EXPORT2
 404 usprep_close(UStringPrepProfile* profile){
 405     if(profile==NULL){
 406         return;
 407     }
 408
 409     umtx_lock(&usprepMutex);
 410     /* decrement the ref count*/
 411     if(profile->refCount > 0){
 412         profile->refCount--;
 413     }
 414     umtx_unlock(&usprepMutex);
 415
 416 }
 417
 418 U_CFUNC void
 419 uprv_syntaxError(const UChar* rules,
 420                  int32_t pos,
 421                  int32_t rulesLen,
 422                  UParseError* parseError){
 423     if(parseError == NULL){
 424         return;
 425     }
 426     parseError->offset = pos;
 427     parseError->line = 0 ; // we are not using line numbers
 428
 429     // for pre-context
 430     int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
 431     int32_t limit = pos;
 432
 433     u_memcpy(parseError->preContext,rules+start,limit-start);
 434     //null terminate the buffer
 435     parseError->preContext[limit-start] = 0;
 436
 437     // for post-context; include error rules[pos]
 438     start = pos;
 439     limit = start + (U_PARSE_CONTEXT_LEN-1);
 440     if (limit > rulesLen) {
 441         limit = rulesLen;
 442     }
 443     if (start < rulesLen) {
 444         u_memcpy(parseError->postContext,rules+start,limit-start);
 445     }
 446     //null terminate the buffer
 447     parseError->postContext[limit-start]= 0;
 448 }
 449
 450
 451 static inline UStringPrepType
 452 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
 453
 454     UStringPrepType type;
 455     if(trieWord == 0){
 456         /*
 457          * Initial value stored in the mapping table
 458          * just return USPREP_TYPE_LIMIT .. so that
 459          * the source codepoint is copied to the destination
 460          */
 461         type = USPREP_TYPE_LIMIT;
 462     }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
 463         type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
 464     }else{
 465         /* get the type */
 466         type = USPREP_MAP;
 467         /* ascertain if the value is index or delta */
 468         if(trieWord & 0x02){
 469             isIndex = TRUE;
 470             value = trieWord  >> 2; //mask off the lower 2 bits and shift
 471
 472         }else{
 473             isIndex = FALSE;
 474             value = (int16_t)trieWord;
 475             value =  (value >> 2);
 476
 477         }
 478
 479         if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
 480             type = USPREP_DELETE;
 481             isIndex =FALSE;
 482             value = 0;
 483         }
 484     }
 485     return type;
 486 }
 487
 488
 489
 490 static int32_t
 491 usprep_map(  const UStringPrepProfile* profile,
 492              const UChar* src, int32_t srcLength,
 493              UChar* dest, int32_t destCapacity,
 494              int32_t options,
 495              UParseError* parseError,
 496              UErrorCode* status ){
 497
 498     uint16_t result;
 499     int32_t destIndex=0;
 500     int32_t srcIndex;
 501     UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
 502     UStringPrepType type;
 503     int16_t value;
 504     UBool isIndex;
 505     const int32_t* indexes = profile->indexes;
 506
 507     // no error checking the caller check for error and arguments
 508     // no string length check the caller finds out the string length
 509
 510     for(srcIndex=0;srcIndex<srcLength;){
 511         UChar32 ch;
 512
 513         U16_NEXT(src,srcIndex,srcLength,ch);
 514
 515         result=0;
 516
 517         UTRIE_GET16(&profile->sprepTrie,ch,result);
 518
 519         type = getValues(result, value, isIndex);
 520
 521         // check if the source codepoint is unassigned
 522         if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
 523
 524             uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
 525             *status = U_STRINGPREP_UNASSIGNED_ERROR;
 526             return 0;
 527
 528         }else if(type == USPREP_MAP){
 529
 530             int32_t index, length;
 531
 532             if(isIndex){
 533                 index = value;
 534                 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
 535                          index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
 536                     length = 1;
 537                 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
 538                          index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
 539                     length = 2;
 540                 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
 541                          index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
 542                     length = 3;
 543                 }else{
 544                     length = profile->mappingData[index++];
 545
 546                 }
 547
 548                 /* copy mapping to destination */
 549                 for(int32_t i=0; i< length; i++){
 550                     if(destIndex < destCapacity  ){
 551                         dest[destIndex] = profile->mappingData[index+i];
 552                     }
 553                     destIndex++; /* for pre-flighting */
 554                 }
 555                 continue;
 556             }else{
 557                 // subtract the delta to arrive at the code point
 558                 ch -= value;
 559             }
 560
 561         }else if(type==USPREP_DELETE){
 562              // just consume the codepoint and contine
 563             continue;
 564         }
 565         //copy the code point into destination
 566         if(ch <= 0xFFFF){
 567             if(destIndex < destCapacity ){
 568                 dest[destIndex] = (UChar)ch;
 569             }
 570             destIndex++;
 571         }else{
 572             if(destIndex+1 < destCapacity ){
 573                 dest[destIndex]   = U16_LEAD(ch);
 574                 dest[destIndex+1] = U16_TRAIL(ch);
 575             }
 576             destIndex +=2;
 577         }
 578
 579     }
 580
 581     return u_terminateUChars(dest, destCapacity, destIndex, status);
 582 }
 583
 584
 585 static int32_t
 586 usprep_normalize(   const UChar* src, int32_t srcLength,
 587                     UChar* dest, int32_t destCapacity,
 588                     UErrorCode* status ){
 589     /*
 590      * Option UNORM_BEFORE_PRI_29:
 591      *
 592      * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
 593      * requires strict adherence to Unicode 3.2 normalization,
 594      * including buggy composition from before fixing Public Review Issue #29.
 595      * Note that this results in some valid but nonsensical text to be
 596      * either corrupted or rejected, depending on the text.
 597      * See http://www.unicode.org/review/resolved-pri.html#pri29
 598      * See unorm.cpp and cnormtst.c
 599      */
 600     return unorm_normalize(
 601         src, srcLength,
 602         UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
 603         dest, destCapacity,
 604         status);
 605 }
 606
 607
 608  /*
 609    1) Map -- For each character in the input, check if it has a mapping
 610       and, if so, replace it with its mapping.
 611
 612    2) Normalize -- Possibly normalize the result of step 1 using Unicode
 613       normalization.
 614
 615    3) Prohibit -- Check for any characters that are not allowed in the
 616       output.  If any are found, return an error.
 617
 618    4) Check bidi -- Possibly check for right-to-left characters, and if
 619       any are found, make sure that the whole string satisfies the
 620       requirements for bidirectional strings.  If the string does not
 621       satisfy the requirements for bidirectional strings, return an
 622       error.
 623       [Unicode3.2] defines several bidirectional categories; each character
 624        has one bidirectional category assigned to it.  For the purposes of
 625        the requirements below, an "RandALCat character" is a character that
 626        has Unicode bidirectional categories "R" or "AL"; an "LCat character"
 627        is a character that has Unicode bidirectional category "L".  Note
 628
 629
 630        that there are many characters which fall in neither of the above
 631        definitions; Latin digits (<U+0030> through <U+0039>) are examples of
 632        this because they have bidirectional category "EN".
 633
 634        In any profile that specifies bidirectional character handling, all
 635        three of the following requirements MUST be met:
 636
 637        1) The characters in section 5.8 MUST be prohibited.
 638
 639        2) If a string contains any RandALCat character, the string MUST NOT
 640           contain any LCat character.
 641
 642        3) If a string contains any RandALCat character, a RandALCat
 643           character MUST be the first character of the string, and a
 644           RandALCat character MUST be the last character of the string.
 645 */
 646
 647 #define MAX_STACK_BUFFER_SIZE 300
 648
 649
 650 U_CAPI int32_t U_EXPORT2
 651 usprep_prepare(   const UStringPrepProfile* profile,
 652                   const UChar* src, int32_t srcLength,
 653                   UChar* dest, int32_t destCapacity,
 654                   int32_t options,
 655                   UParseError* parseError,
 656                   UErrorCode* status ){
 657
 658     // check error status
 659     if(status == NULL || U_FAILURE(*status)){
 660         return 0;
 661     }
 662
 663     //check arguments
 664     if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
 665         *status=U_ILLEGAL_ARGUMENT_ERROR;
 666         return 0;
 667     }
 668
 669     UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
 670     UChar *b1 = b1Stack, *b2 = b2Stack;
 671     int32_t b1Len, b2Len=0,
 672             b1Capacity = MAX_STACK_BUFFER_SIZE ,
 673             b2Capacity = MAX_STACK_BUFFER_SIZE;
 674     uint16_t result;
 675     int32_t b2Index = 0;
 676     UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
 677     UBool leftToRight=FALSE, rightToLeft=FALSE;
 678     int32_t rtlPos =-1, ltrPos =-1;
 679
 680     //get the string length
 681     if(srcLength == -1){
 682         srcLength = u_strlen(src);
 683     }
 684     // map
 685     b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
 686
 687     if(*status == U_BUFFER_OVERFLOW_ERROR){
 688         // redo processing of string
 689         /* we do not have enough room so grow the buffer*/
 690         b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
 691         if(b1==NULL){
 692             *status = U_MEMORY_ALLOCATION_ERROR;
 693             goto CLEANUP;
 694         }
 695
 696         *status = U_ZERO_ERROR; // reset error
 697
 698         b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
 699
 700     }
 701
 702     // normalize
 703     if(profile->doNFKC == TRUE){
 704         b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
 705
 706         if(*status == U_BUFFER_OVERFLOW_ERROR){
 707             // redo processing of string
 708             /* we do not have enough room so grow the buffer*/
 709             b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
 710             if(b2==NULL){
 711                 *status = U_MEMORY_ALLOCATION_ERROR;
 712                 goto CLEANUP;
 713             }
 714
 715             *status = U_ZERO_ERROR; // reset error
 716
 717             b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
 718
 719         }
 720
 721     }else{
 722         b2 = b1;
 723         b2Len = b1Len;
 724     }
 725
 726
 727     if(U_FAILURE(*status)){
 728         goto CLEANUP;
 729     }
 730
 731     UChar32 ch;
 732     UStringPrepType type;
 733     int16_t value;
 734     UBool isIndex;
 735
 736     // Prohibit and checkBiDi in one pass
 737     for(b2Index=0; b2Index<b2Len;){
 738
 739         ch = 0;
 740
 741         U16_NEXT(b2, b2Index, b2Len, ch);
 742
 743         UTRIE_GET16(&profile->sprepTrie,ch,result);
 744
 745         type = getValues(result, value, isIndex);
 746
 747         if( type == USPREP_PROHIBITED ||
 748             ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
 749            ){
 750             *status = U_STRINGPREP_PROHIBITED_ERROR;
 751             uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
 752             goto CLEANUP;
 753         }
 754
 755         direction = u_charDirection(ch);
 756         if(firstCharDir == U_CHAR_DIRECTION_COUNT){
 757             firstCharDir = direction;
 758         }
 759         if(direction == U_LEFT_TO_RIGHT){
 760             leftToRight = TRUE;
 761             ltrPos = b2Index-1;
 762         }
 763         if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
 764             rightToLeft = TRUE;
 765             rtlPos = b2Index-1;
 766         }
 767     }
 768     if(profile->checkBiDi == TRUE){
 769         // satisfy 2
 770         if( leftToRight == TRUE && rightToLeft == TRUE){
 771             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 772             uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
 773             goto CLEANUP;
 774         }
 775
 776         //satisfy 3
 777         if( rightToLeft == TRUE &&
 778             !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
 779               (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
 780            ){
 781             *status = U_STRINGPREP_CHECK_BIDI_ERROR;
 782             uprv_syntaxError(b2, rtlPos, b2Len, parseError);
 783             return FALSE;
 784         }
 785     }
 786     if(b2Len <= destCapacity){
 787         uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
 788     }
 789
 790 CLEANUP:
 791     if(b1!=b1Stack){
 792         uprv_free(b1);
 793         b1=NULL;
 794     }
 795
 796     if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
 797         uprv_free(b2);
 798         b2=NULL;
 799     }
 800     return u_terminateUChars(dest, destCapacity, b2Len, status);
 801 }
 802
 803
 804 /* data swapping ------------------------------------------------------------ */
 805
 806 U_CAPI int32_t U_EXPORT2
 807 usprep_swap(const UDataSwapper *ds,
 808             const void *inData, int32_t length, void *outData,
 809             UErrorCode *pErrorCode) {
 810     const UDataInfo *pInfo;
 811     int32_t headerSize;
 812
 813     const uint8_t *inBytes;
 814     uint8_t *outBytes;
 815
 816     const int32_t *inIndexes;
 817     int32_t indexes[16];
 818
 819     int32_t i, offset, count, size;
 820
 821     /* udata_swapDataHeader checks the arguments */
 822     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
 823     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
 824         return 0;
 825     }
 826
 827     /* check data format and format version */
 828     pInfo=(const UDataInfo *)((const char *)inData+4);
 829     if(!(
 830         pInfo->dataFormat[0]==0x53 &&   /* dataFormat="SPRP" */
 831         pInfo->dataFormat[1]==0x50 &&
 832         pInfo->dataFormat[2]==0x52 &&
 833         pInfo->dataFormat[3]==0x50 &&
 834         pInfo->formatVersion[0]==3
 835     )) {
 836         udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
 837                          pInfo->dataFormat[0], pInfo->dataFormat[1],
 838                          pInfo->dataFormat[2], pInfo->dataFormat[3],
 839                          pInfo->formatVersion[0]);
 840         *pErrorCode=U_UNSUPPORTED_ERROR;
 841         return 0;
 842     }
 843
 844     inBytes=(const uint8_t *)inData+headerSize;
 845     outBytes=(uint8_t *)outData+headerSize;
 846
 847     inIndexes=(const int32_t *)inBytes;
 848
 849     if(length>=0) {
 850         length-=headerSize;
 851         if(length<16*4) {
 852             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
 853                              length);
 854             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 855             return 0;
 856         }
 857     }
 858
 859     /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
 860     for(i=0; i<16; ++i) {
 861         indexes[i]=udata_readInt32(ds, inIndexes[i]);
 862     }
 863
 864     /* calculate the total length of the data */
 865     size=
 866         16*4+ /* size of indexes[] */
 867         indexes[_SPREP_INDEX_TRIE_SIZE]+
 868         indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 869
 870     if(length>=0) {
 871         if(length<size) {
 872             udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
 873                              length);
 874             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 875             return 0;
 876         }
 877
 878         /* copy the data for inaccessible bytes */
 879         if(inBytes!=outBytes) {
 880             uprv_memcpy(outBytes, inBytes, size);
 881         }
 882
 883         offset=0;
 884
 885         /* swap the int32_t indexes[] */
 886         count=16*4;
 887         ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
 888         offset+=count;
 889
 890         /* swap the UTrie */
 891         count=indexes[_SPREP_INDEX_TRIE_SIZE];
 892         utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 893         offset+=count;
 894
 895         /* swap the uint16_t mappingTable[] */
 896         count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
 897         ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
 898         offset+=count;
 899     }
 900
 901     return headerSize+size;
 902 }
 903
 904 #endif /* #if !UCONFIG_NO_IDNA */