]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/usprep.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA
22
23 #include "unicode/usprep.h"
24
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39
40 U_NAMESPACE_USE
41
42 U_CDECL_BEGIN
43
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce;
49
50 static UMutex usprepMutex = U_MUTEX_INITIALIZER;
51
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES[] = {
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
74 };
75
76 static UBool U_CALLCONV
77 isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo *pInfo) {
81 if(
82 pInfo->size>=20 &&
83 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84 pInfo->charsetFamily==U_CHARSET_FAMILY &&
85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo->dataFormat[1]==0x50 &&
87 pInfo->dataFormat[2]==0x52 &&
88 pInfo->dataFormat[3]==0x50 &&
89 pInfo->formatVersion[0]==3 &&
90 pInfo->formatVersion[2]==UTRIE_SHIFT &&
91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92 ) {
93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95 return TRUE;
96 } else {
97 return FALSE;
98 }
99 }
100
101 static int32_t U_CALLCONV
102 getSPrepFoldingOffset(uint32_t data) {
103
104 return (int32_t)data;
105
106 }
107
108 /* hashes an entry */
109 static int32_t U_CALLCONV
110 hashEntry(const UHashTok parm) {
111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112 UHashTok namekey, pathkey;
113 namekey.pointer = b->name;
114 pathkey.pointer = b->path;
115 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116 }
117
118 /* compares two entries */
119 static UBool U_CALLCONV
120 compareEntries(const UHashTok p1, const UHashTok p2) {
121 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123 UHashTok name1, name2, path1, path2;
124 name1.pointer = b1->name;
125 name2.pointer = b2->name;
126 path1.pointer = b1->path;
127 path2.pointer = b2->path;
128 return ((UBool)(uhash_compareChars(name1, name2) &
129 uhash_compareChars(path1, path2)));
130 }
131
132 static void
133 usprep_unload(UStringPrepProfile* data){
134 udata_close(data->sprepData);
135 }
136
137 static int32_t
138 usprep_internal_flushCache(UBool noRefCount){
139 UStringPrepProfile *profile = NULL;
140 UStringPrepKey *key = NULL;
141 int32_t pos = UHASH_FIRST;
142 int32_t deletedNum = 0;
143 const UHashElement *e;
144
145 /*
146 * if shared data hasn't even been lazy evaluated yet
147 * return 0
148 */
149 umtx_lock(&usprepMutex);
150 if (SHARED_DATA_HASHTABLE == NULL) {
151 umtx_unlock(&usprepMutex);
152 return 0;
153 }
154
155 /*creates an enumeration to iterate through every element in the table */
156 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157 {
158 profile = (UStringPrepProfile *) e->value.pointer;
159 key = (UStringPrepKey *) e->key.pointer;
160
161 if ((noRefCount== FALSE && profile->refCount == 0) ||
162 noRefCount== TRUE) {
163 deletedNum++;
164 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165
166 /* unload the data */
167 usprep_unload(profile);
168
169 if(key->name != NULL) {
170 uprv_free(key->name);
171 key->name=NULL;
172 }
173 if(key->path != NULL) {
174 uprv_free(key->path);
175 key->path=NULL;
176 }
177 uprv_free(profile);
178 uprv_free(key);
179 }
180
181 }
182 umtx_unlock(&usprepMutex);
183
184 return deletedNum;
185 }
186
187 /* Works just like ucnv_flushCache()
188 static int32_t
189 usprep_flushCache(){
190 return usprep_internal_flushCache(FALSE);
191 }
192 */
193
194 static UBool U_CALLCONV usprep_cleanup(void){
195 if (SHARED_DATA_HASHTABLE != NULL) {
196 usprep_internal_flushCache(TRUE);
197 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198 uhash_close(SHARED_DATA_HASHTABLE);
199 SHARED_DATA_HASHTABLE = NULL;
200 }
201 }
202 gSharedDataInitOnce.reset();
203 return (SHARED_DATA_HASHTABLE == NULL);
204 }
205 U_CDECL_END
206
207
208 /** Initializes the cache for resources */
209 static void U_CALLCONV
210 createCache(UErrorCode &status) {
211 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212 if (U_FAILURE(status)) {
213 SHARED_DATA_HASHTABLE = NULL;
214 }
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216 }
217
218 static void
219 initCache(UErrorCode *status) {
220 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
221 }
222
223 static UBool U_CALLCONV
224 loadData(UStringPrepProfile* profile,
225 const char* path,
226 const char* name,
227 const char* type,
228 UErrorCode* errorCode) {
229 /* load Unicode SPREP data from file */
230 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231 UDataMemory *dataMemory;
232 const int32_t *p=NULL;
233 const uint8_t *pb;
234 UVersionInfo normUnicodeVersion;
235 int32_t normUniVer, sprepUniVer, normCorrVer;
236
237 if(errorCode==NULL || U_FAILURE(*errorCode)) {
238 return 0;
239 }
240
241 /* open the data outside the mutex block */
242 //TODO: change the path
243 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244 if(U_FAILURE(*errorCode)) {
245 return FALSE;
246 }
247
248 p=(const int32_t *)udata_getMemory(dataMemory);
249 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252
253
254 if(U_FAILURE(*errorCode)) {
255 udata_close(dataMemory);
256 return FALSE;
257 }
258
259 /* in the mutex block, set the data for this process */
260 umtx_lock(&usprepMutex);
261 if(profile->sprepData==NULL) {
262 profile->sprepData=dataMemory;
263 dataMemory=NULL;
264 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266 } else {
267 p=(const int32_t *)udata_getMemory(profile->sprepData);
268 }
269 umtx_unlock(&usprepMutex);
270 /* initialize some variables */
271 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272
273 u_getUnicodeVersion(normUnicodeVersion);
274 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
275 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
277 (dataVersion[2] << 8 ) + (dataVersion[3]);
278 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279
280 if(U_FAILURE(*errorCode)){
281 udata_close(dataMemory);
282 return FALSE;
283 }
284 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287 ){
288 *errorCode = U_INVALID_FORMAT_ERROR;
289 udata_close(dataMemory);
290 return FALSE;
291 }
292 profile->isDataLoaded = TRUE;
293
294 /* if a different thread set it first, then close the extra data */
295 if(dataMemory!=NULL) {
296 udata_close(dataMemory); /* NULL if it was set correctly */
297 }
298
299
300 return profile->isDataLoaded;
301 }
302
303 static UStringPrepProfile*
304 usprep_getProfile(const char* path,
305 const char* name,
306 UErrorCode *status){
307
308 UStringPrepProfile* profile = NULL;
309
310 initCache(status);
311
312 if(U_FAILURE(*status)){
313 return NULL;
314 }
315
316 UStringPrepKey stackKey;
317 /*
318 * const is cast way to save malloc, strcpy and free calls
319 * we use the passed in pointers for fetching the data from the
320 * hash table which is safe
321 */
322 stackKey.name = (char*) name;
323 stackKey.path = (char*) path;
324
325 /* fetch the data from the cache */
326 umtx_lock(&usprepMutex);
327 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
328 if(profile != NULL) {
329 profile->refCount++;
330 }
331 umtx_unlock(&usprepMutex);
332
333 if(profile == NULL) {
334 /* else load the data and put the data in the cache */
335 LocalMemory<UStringPrepProfile> newProfile;
336 if(newProfile.allocateInsteadAndReset() == NULL) {
337 *status = U_MEMORY_ALLOCATION_ERROR;
338 return NULL;
339 }
340
341 /* load the data */
342 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
343 return NULL;
344 }
345
346 /* get the options */
347 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
349
350 LocalMemory<UStringPrepKey> key;
351 LocalMemory<char> keyName;
352 LocalMemory<char> keyPath;
353 if( key.allocateInsteadAndReset() == NULL ||
354 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
355 (path != NULL &&
356 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
357 ) {
358 *status = U_MEMORY_ALLOCATION_ERROR;
359 usprep_unload(newProfile.getAlias());
360 return NULL;
361 }
362
363 umtx_lock(&usprepMutex);
364 // If another thread already inserted the same key/value, refcount and cleanup our thread data
365 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
366 if(profile != NULL) {
367 profile->refCount++;
368 usprep_unload(newProfile.getAlias());
369 }
370 else {
371 /* initialize the key members */
372 key->name = keyName.orphan();
373 uprv_strcpy(key->name, name);
374 if(path != NULL){
375 key->path = keyPath.orphan();
376 uprv_strcpy(key->path, path);
377 }
378 profile = newProfile.orphan();
379
380 /* add the data object to the cache */
381 profile->refCount = 1;
382 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
383 }
384 umtx_unlock(&usprepMutex);
385 }
386
387 return profile;
388 }
389
390 U_CAPI UStringPrepProfile* U_EXPORT2
391 usprep_open(const char* path,
392 const char* name,
393 UErrorCode* status){
394
395 if(status == NULL || U_FAILURE(*status)){
396 return NULL;
397 }
398
399 /* initialize the profile struct members */
400 return usprep_getProfile(path,name,status);
401 }
402
403 U_CAPI UStringPrepProfile* U_EXPORT2
404 usprep_openByType(UStringPrepProfileType type,
405 UErrorCode* status) {
406 if(status == NULL || U_FAILURE(*status)){
407 return NULL;
408 }
409 int32_t index = (int32_t)type;
410 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
411 *status = U_ILLEGAL_ARGUMENT_ERROR;
412 return NULL;
413 }
414 return usprep_open(NULL, PROFILE_NAMES[index], status);
415 }
416
417 U_CAPI void U_EXPORT2
418 usprep_close(UStringPrepProfile* profile){
419 if(profile==NULL){
420 return;
421 }
422
423 umtx_lock(&usprepMutex);
424 /* decrement the ref count*/
425 if(profile->refCount > 0){
426 profile->refCount--;
427 }
428 umtx_unlock(&usprepMutex);
429
430 }
431
432 U_CFUNC void
433 uprv_syntaxError(const UChar* rules,
434 int32_t pos,
435 int32_t rulesLen,
436 UParseError* parseError){
437 if(parseError == NULL){
438 return;
439 }
440 parseError->offset = pos;
441 parseError->line = 0 ; // we are not using line numbers
442
443 // for pre-context
444 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
445 int32_t limit = pos;
446
447 u_memcpy(parseError->preContext,rules+start,limit-start);
448 //null terminate the buffer
449 parseError->preContext[limit-start] = 0;
450
451 // for post-context; include error rules[pos]
452 start = pos;
453 limit = start + (U_PARSE_CONTEXT_LEN-1);
454 if (limit > rulesLen) {
455 limit = rulesLen;
456 }
457 if (start < rulesLen) {
458 u_memcpy(parseError->postContext,rules+start,limit-start);
459 }
460 //null terminate the buffer
461 parseError->postContext[limit-start]= 0;
462 }
463
464
465 static inline UStringPrepType
466 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
467
468 UStringPrepType type;
469 if(trieWord == 0){
470 /*
471 * Initial value stored in the mapping table
472 * just return USPREP_TYPE_LIMIT .. so that
473 * the source codepoint is copied to the destination
474 */
475 type = USPREP_TYPE_LIMIT;
476 isIndex =FALSE;
477 value = 0;
478 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
479 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
480 isIndex =FALSE;
481 value = 0;
482 }else{
483 /* get the type */
484 type = USPREP_MAP;
485 /* ascertain if the value is index or delta */
486 if(trieWord & 0x02){
487 isIndex = TRUE;
488 value = trieWord >> 2; //mask off the lower 2 bits and shift
489 }else{
490 isIndex = FALSE;
491 value = (int16_t)trieWord;
492 value = (value >> 2);
493 }
494
495 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
496 type = USPREP_DELETE;
497 isIndex =FALSE;
498 value = 0;
499 }
500 }
501 return type;
502 }
503
504 // TODO: change to writing to UnicodeString not UChar *
505 static int32_t
506 usprep_map( const UStringPrepProfile* profile,
507 const UChar* src, int32_t srcLength,
508 UChar* dest, int32_t destCapacity,
509 int32_t options,
510 UParseError* parseError,
511 UErrorCode* status ){
512
513 uint16_t result;
514 int32_t destIndex=0;
515 int32_t srcIndex;
516 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
517 UStringPrepType type;
518 int16_t value;
519 UBool isIndex;
520 const int32_t* indexes = profile->indexes;
521
522 // no error checking the caller check for error and arguments
523 // no string length check the caller finds out the string length
524
525 for(srcIndex=0;srcIndex<srcLength;){
526 UChar32 ch;
527
528 U16_NEXT(src,srcIndex,srcLength,ch);
529
530 result=0;
531
532 UTRIE_GET16(&profile->sprepTrie,ch,result);
533
534 type = getValues(result, value, isIndex);
535
536 // check if the source codepoint is unassigned
537 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
538
539 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
540 *status = U_STRINGPREP_UNASSIGNED_ERROR;
541 return 0;
542
543 }else if(type == USPREP_MAP){
544
545 int32_t index, length;
546
547 if(isIndex){
548 index = value;
549 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
550 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
551 length = 1;
552 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
553 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
554 length = 2;
555 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
556 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
557 length = 3;
558 }else{
559 length = profile->mappingData[index++];
560
561 }
562
563 /* copy mapping to destination */
564 for(int32_t i=0; i< length; i++){
565 if(destIndex < destCapacity ){
566 dest[destIndex] = profile->mappingData[index+i];
567 }
568 destIndex++; /* for pre-flighting */
569 }
570 continue;
571 }else{
572 // subtract the delta to arrive at the code point
573 ch -= value;
574 }
575
576 }else if(type==USPREP_DELETE){
577 // just consume the codepoint and contine
578 continue;
579 }
580 //copy the code point into destination
581 if(ch <= 0xFFFF){
582 if(destIndex < destCapacity ){
583 dest[destIndex] = (UChar)ch;
584 }
585 destIndex++;
586 }else{
587 if(destIndex+1 < destCapacity ){
588 dest[destIndex] = U16_LEAD(ch);
589 dest[destIndex+1] = U16_TRAIL(ch);
590 }
591 destIndex +=2;
592 }
593
594 }
595
596 return u_terminateUChars(dest, destCapacity, destIndex, status);
597 }
598
599 /*
600 1) Map -- For each character in the input, check if it has a mapping
601 and, if so, replace it with its mapping.
602
603 2) Normalize -- Possibly normalize the result of step 1 using Unicode
604 normalization.
605
606 3) Prohibit -- Check for any characters that are not allowed in the
607 output. If any are found, return an error.
608
609 4) Check bidi -- Possibly check for right-to-left characters, and if
610 any are found, make sure that the whole string satisfies the
611 requirements for bidirectional strings. If the string does not
612 satisfy the requirements for bidirectional strings, return an
613 error.
614 [Unicode3.2] defines several bidirectional categories; each character
615 has one bidirectional category assigned to it. For the purposes of
616 the requirements below, an "RandALCat character" is a character that
617 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
618 is a character that has Unicode bidirectional category "L". Note
619
620
621 that there are many characters which fall in neither of the above
622 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
623 this because they have bidirectional category "EN".
624
625 In any profile that specifies bidirectional character handling, all
626 three of the following requirements MUST be met:
627
628 1) The characters in section 5.8 MUST be prohibited.
629
630 2) If a string contains any RandALCat character, the string MUST NOT
631 contain any LCat character.
632
633 3) If a string contains any RandALCat character, a RandALCat
634 character MUST be the first character of the string, and a
635 RandALCat character MUST be the last character of the string.
636 */
637 U_CAPI int32_t U_EXPORT2
638 usprep_prepare( const UStringPrepProfile* profile,
639 const UChar* src, int32_t srcLength,
640 UChar* dest, int32_t destCapacity,
641 int32_t options,
642 UParseError* parseError,
643 UErrorCode* status ){
644
645 // check error status
646 if(U_FAILURE(*status)){
647 return 0;
648 }
649
650 //check arguments
651 if(profile==NULL ||
652 (src==NULL ? srcLength!=0 : srcLength<-1) ||
653 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
654 *status=U_ILLEGAL_ARGUMENT_ERROR;
655 return 0;
656 }
657
658 //get the string length
659 if(srcLength < 0){
660 srcLength = u_strlen(src);
661 }
662 // map
663 UnicodeString s1;
664 UChar *b1 = s1.getBuffer(srcLength);
665 if(b1==NULL){
666 *status = U_MEMORY_ALLOCATION_ERROR;
667 return 0;
668 }
669 int32_t b1Len = usprep_map(profile, src, srcLength,
670 b1, s1.getCapacity(), options, parseError, status);
671 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
672
673 if(*status == U_BUFFER_OVERFLOW_ERROR){
674 // redo processing of string
675 /* we do not have enough room so grow the buffer*/
676 b1 = s1.getBuffer(b1Len);
677 if(b1==NULL){
678 *status = U_MEMORY_ALLOCATION_ERROR;
679 return 0;
680 }
681
682 *status = U_ZERO_ERROR; // reset error
683 b1Len = usprep_map(profile, src, srcLength,
684 b1, s1.getCapacity(), options, parseError, status);
685 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
686 }
687 if(U_FAILURE(*status)){
688 return 0;
689 }
690
691 // normalize
692 UnicodeString s2;
693 if(profile->doNFKC){
694 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
695 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
696 if(U_FAILURE(*status)){
697 return 0;
698 }
699 fn2.normalize(s1, s2, *status);
700 }else{
701 s2.fastCopyFrom(s1);
702 }
703 if(U_FAILURE(*status)){
704 return 0;
705 }
706
707 // Prohibit and checkBiDi in one pass
708 const UChar *b2 = s2.getBuffer();
709 int32_t b2Len = s2.length();
710 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
711 UBool leftToRight=FALSE, rightToLeft=FALSE;
712 int32_t rtlPos =-1, ltrPos =-1;
713
714 for(int32_t b2Index=0; b2Index<b2Len;){
715 UChar32 ch = 0;
716 U16_NEXT(b2, b2Index, b2Len, ch);
717
718 uint16_t result;
719 UTRIE_GET16(&profile->sprepTrie,ch,result);
720
721 int16_t value;
722 UBool isIndex;
723 UStringPrepType type = getValues(result, value, isIndex);
724
725 if( type == USPREP_PROHIBITED ||
726 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
727 ){
728 *status = U_STRINGPREP_PROHIBITED_ERROR;
729 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
730 return 0;
731 }
732
733 if(profile->checkBiDi) {
734 direction = ubidi_getClass(ch);
735 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
736 firstCharDir = direction;
737 }
738 if(direction == U_LEFT_TO_RIGHT){
739 leftToRight = TRUE;
740 ltrPos = b2Index-1;
741 }
742 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
743 rightToLeft = TRUE;
744 rtlPos = b2Index-1;
745 }
746 }
747 }
748 if(profile->checkBiDi == TRUE){
749 // satisfy 2
750 if( leftToRight == TRUE && rightToLeft == TRUE){
751 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
752 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
753 return 0;
754 }
755
756 //satisfy 3
757 if( rightToLeft == TRUE &&
758 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
759 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
760 ){
761 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
762 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
763 return FALSE;
764 }
765 }
766 return s2.extract(dest, destCapacity, *status);
767 }
768
769
770 /* data swapping ------------------------------------------------------------ */
771
772 U_CAPI int32_t U_EXPORT2
773 usprep_swap(const UDataSwapper *ds,
774 const void *inData, int32_t length, void *outData,
775 UErrorCode *pErrorCode) {
776 const UDataInfo *pInfo;
777 int32_t headerSize;
778
779 const uint8_t *inBytes;
780 uint8_t *outBytes;
781
782 const int32_t *inIndexes;
783 int32_t indexes[16];
784
785 int32_t i, offset, count, size;
786
787 /* udata_swapDataHeader checks the arguments */
788 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
789 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
790 return 0;
791 }
792
793 /* check data format and format version */
794 pInfo=(const UDataInfo *)((const char *)inData+4);
795 if(!(
796 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
797 pInfo->dataFormat[1]==0x50 &&
798 pInfo->dataFormat[2]==0x52 &&
799 pInfo->dataFormat[3]==0x50 &&
800 pInfo->formatVersion[0]==3
801 )) {
802 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
803 pInfo->dataFormat[0], pInfo->dataFormat[1],
804 pInfo->dataFormat[2], pInfo->dataFormat[3],
805 pInfo->formatVersion[0]);
806 *pErrorCode=U_UNSUPPORTED_ERROR;
807 return 0;
808 }
809
810 inBytes=(const uint8_t *)inData+headerSize;
811 outBytes=(uint8_t *)outData+headerSize;
812
813 inIndexes=(const int32_t *)inBytes;
814
815 if(length>=0) {
816 length-=headerSize;
817 if(length<16*4) {
818 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
819 length);
820 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
821 return 0;
822 }
823 }
824
825 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
826 for(i=0; i<16; ++i) {
827 indexes[i]=udata_readInt32(ds, inIndexes[i]);
828 }
829
830 /* calculate the total length of the data */
831 size=
832 16*4+ /* size of indexes[] */
833 indexes[_SPREP_INDEX_TRIE_SIZE]+
834 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
835
836 if(length>=0) {
837 if(length<size) {
838 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
839 length);
840 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
841 return 0;
842 }
843
844 /* copy the data for inaccessible bytes */
845 if(inBytes!=outBytes) {
846 uprv_memcpy(outBytes, inBytes, size);
847 }
848
849 offset=0;
850
851 /* swap the int32_t indexes[] */
852 count=16*4;
853 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
854 offset+=count;
855
856 /* swap the UTrie */
857 count=indexes[_SPREP_INDEX_TRIE_SIZE];
858 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
859 offset+=count;
860
861 /* swap the uint16_t mappingTable[] */
862 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
863 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
864 //offset+=count;
865 }
866
867 return headerSize+size;
868 }
869
870 #endif /* #if !UCONFIG_NO_IDNA */