]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/usprep.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_IDNA
20
21 #include "unicode/usprep.h"
22
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "sprpimpl.h"
30 #include "ustr_imp.h"
31 #include "uhash.h"
32 #include "cstring.h"
33 #include "udataswp.h"
34 #include "ucln_cmn.h"
35 #include "unormimp.h"
36
37 U_CDECL_BEGIN
38
39 /*
40 Static cache for already opened StringPrep profiles
41 */
42 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
43
44 static UMTX usprepMutex = NULL;
45
46 /* format version of spp file */
47 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
48
49 /* the Unicode version of the sprep data */
50 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
51
52 static UBool U_CALLCONV
53 isSPrepAcceptable(void * /* context */,
54 const char * /* type */,
55 const char * /* name */,
56 const UDataInfo *pInfo) {
57 if(
58 pInfo->size>=20 &&
59 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
60 pInfo->charsetFamily==U_CHARSET_FAMILY &&
61 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
62 pInfo->dataFormat[1]==0x50 &&
63 pInfo->dataFormat[2]==0x52 &&
64 pInfo->dataFormat[3]==0x50 &&
65 pInfo->formatVersion[0]==3 &&
66 pInfo->formatVersion[2]==UTRIE_SHIFT &&
67 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
68 ) {
69 uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
70 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
71 return TRUE;
72 } else {
73 return FALSE;
74 }
75 }
76
77 static int32_t U_CALLCONV
78 getSPrepFoldingOffset(uint32_t data) {
79
80 return (int32_t)data;
81
82 }
83
84 /* hashes an entry */
85 static int32_t U_EXPORT2 U_CALLCONV
86 hashEntry(const UHashTok parm) {
87 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
88 UHashTok namekey, pathkey;
89 namekey.pointer = b->name;
90 pathkey.pointer = b->path;
91 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
92 }
93
94 /* compares two entries */
95 static UBool U_EXPORT2 U_CALLCONV
96 compareEntries(const UHashTok p1, const UHashTok p2) {
97 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
98 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
99 UHashTok name1, name2, path1, path2;
100 name1.pointer = b1->name;
101 name2.pointer = b2->name;
102 path1.pointer = b1->path;
103 path2.pointer = b2->path;
104 return ((UBool)(uhash_compareChars(name1, name2) &
105 uhash_compareChars(path1, path2)));
106 }
107
108 static void
109 usprep_unload(UStringPrepProfile* data){
110 udata_close(data->sprepData);
111 }
112
113 static int32_t
114 usprep_internal_flushCache(UBool noRefCount){
115 UStringPrepProfile *profile = NULL;
116 UStringPrepKey *key = NULL;
117 int32_t pos = -1;
118 int32_t deletedNum = 0;
119 const UHashElement *e;
120
121 /*
122 * if shared data hasn't even been lazy evaluated yet
123 * return 0
124 */
125 umtx_lock(&usprepMutex);
126 if (SHARED_DATA_HASHTABLE == NULL) {
127 umtx_unlock(&usprepMutex);
128 return 0;
129 }
130
131 /*creates an enumeration to iterate through every element in the table */
132 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
133 {
134 profile = (UStringPrepProfile *) e->value.pointer;
135 key = (UStringPrepKey *) e->key.pointer;
136
137 if ((noRefCount== FALSE && profile->refCount == 0) ||
138 noRefCount== TRUE) {
139 deletedNum++;
140 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
141
142 /* unload the data */
143 usprep_unload(profile);
144
145 if(key->name != NULL) {
146 uprv_free(key->name);
147 key->name=NULL;
148 }
149 if(key->path != NULL) {
150 uprv_free(key->path);
151 key->path=NULL;
152 }
153 uprv_free(profile);
154 uprv_free(key);
155 }
156
157 }
158 umtx_unlock(&usprepMutex);
159
160 return deletedNum;
161 }
162
163 /* Works just like ucnv_flushCache()
164 static int32_t
165 usprep_flushCache(){
166 return usprep_internal_flushCache(FALSE);
167 }
168 */
169
170 static UBool U_CALLCONV usprep_cleanup(void){
171 if (SHARED_DATA_HASHTABLE != NULL) {
172 usprep_internal_flushCache(TRUE);
173 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
174 uhash_close(SHARED_DATA_HASHTABLE);
175 SHARED_DATA_HASHTABLE = NULL;
176 }
177 }
178
179 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
180 /* if the hash table still exists. The mutex */
181 /* will lazily re-init itself if needed. */
182 return (SHARED_DATA_HASHTABLE == NULL);
183 }
184 U_CDECL_END
185
186 static void
187 usprep_init() {
188 umtx_init(&usprepMutex);
189 }
190
191 /** Initializes the cache for resources */
192 static void
193 initCache(UErrorCode *status) {
194 UBool makeCache = FALSE;
195 umtx_lock(&usprepMutex);
196 makeCache = (SHARED_DATA_HASHTABLE == NULL);
197 umtx_unlock(&usprepMutex);
198 if(makeCache) {
199 UHashtable *newCache = uhash_open(hashEntry, compareEntries, status);
200 if (U_FAILURE(*status)) {
201 return;
202 }
203 umtx_lock(&usprepMutex);
204 if(SHARED_DATA_HASHTABLE == NULL) {
205 SHARED_DATA_HASHTABLE = newCache;
206 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
207 newCache = NULL;
208 }
209 umtx_unlock(&usprepMutex);
210 if(newCache != NULL) {
211 uhash_close(newCache);
212 }
213 }
214 }
215
216 static UBool U_CALLCONV
217 loadData(UStringPrepProfile* profile,
218 const char* path,
219 const char* name,
220 const char* type,
221 UErrorCode* errorCode) {
222 /* load Unicode SPREP data from file */
223 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
224 UDataMemory *dataMemory;
225 const int32_t *p=NULL;
226 const uint8_t *pb;
227 UVersionInfo normUnicodeVersion;
228 int32_t normUniVer, sprepUniVer, normCorrVer;
229
230 if(errorCode==NULL || U_FAILURE(*errorCode)) {
231 return 0;
232 }
233
234 /* open the data outside the mutex block */
235 //TODO: change the path
236 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
237 if(U_FAILURE(*errorCode)) {
238 return FALSE;
239 }
240
241 p=(const int32_t *)udata_getMemory(dataMemory);
242 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
243 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
244 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
245
246
247 if(U_FAILURE(*errorCode)) {
248 udata_close(dataMemory);
249 return FALSE;
250 }
251
252 /* in the mutex block, set the data for this process */
253 umtx_lock(&usprepMutex);
254 if(profile->sprepData==NULL) {
255 profile->sprepData=dataMemory;
256 dataMemory=NULL;
257 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
258 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
259 } else {
260 p=(const int32_t *)udata_getMemory(profile->sprepData);
261 }
262 umtx_unlock(&usprepMutex);
263 /* initialize some variables */
264 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
265
266 unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
267 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
268 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
269 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
270 (dataVersion[2] << 8 ) + (dataVersion[3]);
271 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
272
273 if(U_FAILURE(*errorCode)){
274 udata_close(dataMemory);
275 return FALSE;
276 }
277 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
278 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
279 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
280 ){
281 *errorCode = U_INVALID_FORMAT_ERROR;
282 udata_close(dataMemory);
283 return FALSE;
284 }
285 profile->isDataLoaded = TRUE;
286
287 /* if a different thread set it first, then close the extra data */
288 if(dataMemory!=NULL) {
289 udata_close(dataMemory); /* NULL if it was set correctly */
290 }
291
292
293 return profile->isDataLoaded;
294 }
295
296 static UStringPrepProfile*
297 usprep_getProfile(const char* path,
298 const char* name,
299 UErrorCode *status){
300
301 UStringPrepProfile* profile = NULL;
302
303 initCache(status);
304
305 if(U_FAILURE(*status)){
306 return NULL;
307 }
308
309 UStringPrepKey stackKey;
310 /*
311 * const is cast way to save malloc, strcpy and free calls
312 * we use the passed in pointers for fetching the data from the
313 * hash table which is safe
314 */
315 stackKey.name = (char*) name;
316 stackKey.path = (char*) path;
317
318 /* fetch the data from the cache */
319 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
320
321 if(profile == NULL){
322 UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
323 if(key == NULL){
324 *status = U_MEMORY_ALLOCATION_ERROR;
325 return NULL;
326 }
327 /* else load the data and put the data in the cache */
328 profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
329 if(profile == NULL){
330 *status = U_MEMORY_ALLOCATION_ERROR;
331 uprv_free(key);
332 return NULL;
333 }
334
335 /* initialize the data struct members */
336 uprv_memset(profile->indexes,0,sizeof(profile->indexes));
337 profile->mappingData = NULL;
338 profile->sprepData = NULL;
339 profile->refCount = 0;
340
341 /* initialize the key memebers */
342 key->name = (char*) uprv_malloc(uprv_strlen(name)+1);
343 if(key->name == NULL){
344 *status = U_MEMORY_ALLOCATION_ERROR;
345 uprv_free(key);
346 uprv_free(profile);
347 return NULL;
348 }
349
350 uprv_strcpy(key->name, name);
351
352 key->path=NULL;
353
354 if(path != NULL){
355 key->path = (char*) uprv_malloc(uprv_strlen(path)+1);
356 if(key->path == NULL){
357 *status = U_MEMORY_ALLOCATION_ERROR;
358 uprv_free(key->path);
359 uprv_free(key);
360 uprv_free(profile);
361 return NULL;
362 }
363 uprv_strcpy(key->path, path);
364 }
365
366 /* load the data */
367 if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
368 return NULL;
369 }
370
371 /* get the options */
372 profile->doNFKC = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
373 profile->checkBiDi = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374
375 umtx_lock(&usprepMutex);
376 /* add the data object to the cache */
377 uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
378 umtx_unlock(&usprepMutex);
379 }
380 umtx_lock(&usprepMutex);
381 /* increment the refcount */
382 profile->refCount++;
383 umtx_unlock(&usprepMutex);
384
385 return profile;
386 }
387
388 U_CAPI UStringPrepProfile* U_EXPORT2
389 usprep_open(const char* path,
390 const char* name,
391 UErrorCode* status){
392
393 if(status == NULL || U_FAILURE(*status)){
394 return NULL;
395 }
396 /* initialize the mutex */
397 usprep_init();
398
399 /* initialize the profile struct members */
400 return usprep_getProfile(path,name,status);;
401 }
402
403 U_CAPI void U_EXPORT2
404 usprep_close(UStringPrepProfile* profile){
405 if(profile==NULL){
406 return;
407 }
408
409 umtx_lock(&usprepMutex);
410 /* decrement the ref count*/
411 if(profile->refCount > 0){
412 profile->refCount--;
413 }
414 umtx_unlock(&usprepMutex);
415
416 }
417
418 U_CFUNC void
419 uprv_syntaxError(const UChar* rules,
420 int32_t pos,
421 int32_t rulesLen,
422 UParseError* parseError){
423 if(parseError == NULL){
424 return;
425 }
426 parseError->offset = pos;
427 parseError->line = 0 ; // we are not using line numbers
428
429 // for pre-context
430 int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
431 int32_t limit = pos;
432
433 u_memcpy(parseError->preContext,rules+start,limit-start);
434 //null terminate the buffer
435 parseError->preContext[limit-start] = 0;
436
437 // for post-context; include error rules[pos]
438 start = pos;
439 limit = start + (U_PARSE_CONTEXT_LEN-1);
440 if (limit > rulesLen) {
441 limit = rulesLen;
442 }
443 if (start < rulesLen) {
444 u_memcpy(parseError->postContext,rules+start,limit-start);
445 }
446 //null terminate the buffer
447 parseError->postContext[limit-start]= 0;
448 }
449
450
451 static inline UStringPrepType
452 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
453
454 UStringPrepType type;
455 if(trieWord == 0){
456 /*
457 * Initial value stored in the mapping table
458 * just return USPREP_TYPE_LIMIT .. so that
459 * the source codepoint is copied to the destination
460 */
461 type = USPREP_TYPE_LIMIT;
462 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
463 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
464 }else{
465 /* get the type */
466 type = USPREP_MAP;
467 /* ascertain if the value is index or delta */
468 if(trieWord & 0x02){
469 isIndex = TRUE;
470 value = trieWord >> 2; //mask off the lower 2 bits and shift
471
472 }else{
473 isIndex = FALSE;
474 value = (int16_t)trieWord;
475 value = (value >> 2);
476
477 }
478
479 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
480 type = USPREP_DELETE;
481 isIndex =FALSE;
482 value = 0;
483 }
484 }
485 return type;
486 }
487
488
489
490 static int32_t
491 usprep_map( const UStringPrepProfile* profile,
492 const UChar* src, int32_t srcLength,
493 UChar* dest, int32_t destCapacity,
494 int32_t options,
495 UParseError* parseError,
496 UErrorCode* status ){
497
498 uint16_t result;
499 int32_t destIndex=0;
500 int32_t srcIndex;
501 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
502 UStringPrepType type;
503 int16_t value;
504 UBool isIndex;
505 const int32_t* indexes = profile->indexes;
506
507 // no error checking the caller check for error and arguments
508 // no string length check the caller finds out the string length
509
510 for(srcIndex=0;srcIndex<srcLength;){
511 UChar32 ch;
512
513 U16_NEXT(src,srcIndex,srcLength,ch);
514
515 result=0;
516
517 UTRIE_GET16(&profile->sprepTrie,ch,result);
518
519 type = getValues(result, value, isIndex);
520
521 // check if the source codepoint is unassigned
522 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
523
524 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
525 *status = U_STRINGPREP_UNASSIGNED_ERROR;
526 return 0;
527
528 }else if(type == USPREP_MAP){
529
530 int32_t index, length;
531
532 if(isIndex){
533 index = value;
534 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
535 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
536 length = 1;
537 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
538 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
539 length = 2;
540 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
541 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
542 length = 3;
543 }else{
544 length = profile->mappingData[index++];
545
546 }
547
548 /* copy mapping to destination */
549 for(int32_t i=0; i< length; i++){
550 if(destIndex < destCapacity ){
551 dest[destIndex] = profile->mappingData[index+i];
552 }
553 destIndex++; /* for pre-flighting */
554 }
555 continue;
556 }else{
557 // subtract the delta to arrive at the code point
558 ch -= value;
559 }
560
561 }else if(type==USPREP_DELETE){
562 // just consume the codepoint and contine
563 continue;
564 }
565 //copy the code point into destination
566 if(ch <= 0xFFFF){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = (UChar)ch;
569 }
570 destIndex++;
571 }else{
572 if(destIndex+1 < destCapacity ){
573 dest[destIndex] = U16_LEAD(ch);
574 dest[destIndex+1] = U16_TRAIL(ch);
575 }
576 destIndex +=2;
577 }
578
579 }
580
581 return u_terminateUChars(dest, destCapacity, destIndex, status);
582 }
583
584
585 static int32_t
586 usprep_normalize( const UChar* src, int32_t srcLength,
587 UChar* dest, int32_t destCapacity,
588 UErrorCode* status ){
589 /*
590 * Option UNORM_BEFORE_PRI_29:
591 *
592 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
593 * requires strict adherence to Unicode 3.2 normalization,
594 * including buggy composition from before fixing Public Review Issue #29.
595 * Note that this results in some valid but nonsensical text to be
596 * either corrupted or rejected, depending on the text.
597 * See http://www.unicode.org/review/resolved-pri.html#pri29
598 * See unorm.cpp and cnormtst.c
599 */
600 return unorm_normalize(
601 src, srcLength,
602 UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
603 dest, destCapacity,
604 status);
605 }
606
607
608 /*
609 1) Map -- For each character in the input, check if it has a mapping
610 and, if so, replace it with its mapping.
611
612 2) Normalize -- Possibly normalize the result of step 1 using Unicode
613 normalization.
614
615 3) Prohibit -- Check for any characters that are not allowed in the
616 output. If any are found, return an error.
617
618 4) Check bidi -- Possibly check for right-to-left characters, and if
619 any are found, make sure that the whole string satisfies the
620 requirements for bidirectional strings. If the string does not
621 satisfy the requirements for bidirectional strings, return an
622 error.
623 [Unicode3.2] defines several bidirectional categories; each character
624 has one bidirectional category assigned to it. For the purposes of
625 the requirements below, an "RandALCat character" is a character that
626 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
627 is a character that has Unicode bidirectional category "L". Note
628
629
630 that there are many characters which fall in neither of the above
631 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
632 this because they have bidirectional category "EN".
633
634 In any profile that specifies bidirectional character handling, all
635 three of the following requirements MUST be met:
636
637 1) The characters in section 5.8 MUST be prohibited.
638
639 2) If a string contains any RandALCat character, the string MUST NOT
640 contain any LCat character.
641
642 3) If a string contains any RandALCat character, a RandALCat
643 character MUST be the first character of the string, and a
644 RandALCat character MUST be the last character of the string.
645 */
646
647 #define MAX_STACK_BUFFER_SIZE 300
648
649
650 U_CAPI int32_t U_EXPORT2
651 usprep_prepare( const UStringPrepProfile* profile,
652 const UChar* src, int32_t srcLength,
653 UChar* dest, int32_t destCapacity,
654 int32_t options,
655 UParseError* parseError,
656 UErrorCode* status ){
657
658 // check error status
659 if(status == NULL || U_FAILURE(*status)){
660 return 0;
661 }
662
663 //check arguments
664 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
665 *status=U_ILLEGAL_ARGUMENT_ERROR;
666 return 0;
667 }
668
669 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
670 UChar *b1 = b1Stack, *b2 = b2Stack;
671 int32_t b1Len, b2Len=0,
672 b1Capacity = MAX_STACK_BUFFER_SIZE ,
673 b2Capacity = MAX_STACK_BUFFER_SIZE;
674 uint16_t result;
675 int32_t b2Index = 0;
676 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
677 UBool leftToRight=FALSE, rightToLeft=FALSE;
678 int32_t rtlPos =-1, ltrPos =-1;
679
680 //get the string length
681 if(srcLength == -1){
682 srcLength = u_strlen(src);
683 }
684 // map
685 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
686
687 if(*status == U_BUFFER_OVERFLOW_ERROR){
688 // redo processing of string
689 /* we do not have enough room so grow the buffer*/
690 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
691 if(b1==NULL){
692 *status = U_MEMORY_ALLOCATION_ERROR;
693 goto CLEANUP;
694 }
695
696 *status = U_ZERO_ERROR; // reset error
697
698 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
699
700 }
701
702 // normalize
703 if(profile->doNFKC == TRUE){
704 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
705
706 if(*status == U_BUFFER_OVERFLOW_ERROR){
707 // redo processing of string
708 /* we do not have enough room so grow the buffer*/
709 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
710 if(b2==NULL){
711 *status = U_MEMORY_ALLOCATION_ERROR;
712 goto CLEANUP;
713 }
714
715 *status = U_ZERO_ERROR; // reset error
716
717 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
718
719 }
720
721 }else{
722 b2 = b1;
723 b2Len = b1Len;
724 }
725
726
727 if(U_FAILURE(*status)){
728 goto CLEANUP;
729 }
730
731 UChar32 ch;
732 UStringPrepType type;
733 int16_t value;
734 UBool isIndex;
735
736 // Prohibit and checkBiDi in one pass
737 for(b2Index=0; b2Index<b2Len;){
738
739 ch = 0;
740
741 U16_NEXT(b2, b2Index, b2Len, ch);
742
743 UTRIE_GET16(&profile->sprepTrie,ch,result);
744
745 type = getValues(result, value, isIndex);
746
747 if( type == USPREP_PROHIBITED ||
748 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
749 ){
750 *status = U_STRINGPREP_PROHIBITED_ERROR;
751 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
752 goto CLEANUP;
753 }
754
755 direction = u_charDirection(ch);
756 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
757 firstCharDir = direction;
758 }
759 if(direction == U_LEFT_TO_RIGHT){
760 leftToRight = TRUE;
761 ltrPos = b2Index-1;
762 }
763 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
764 rightToLeft = TRUE;
765 rtlPos = b2Index-1;
766 }
767 }
768 if(profile->checkBiDi == TRUE){
769 // satisfy 2
770 if( leftToRight == TRUE && rightToLeft == TRUE){
771 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
772 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
773 goto CLEANUP;
774 }
775
776 //satisfy 3
777 if( rightToLeft == TRUE &&
778 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
779 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
780 ){
781 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
782 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
783 return FALSE;
784 }
785 }
786 if(b2Len <= destCapacity){
787 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
788 }
789
790 CLEANUP:
791 if(b1!=b1Stack){
792 uprv_free(b1);
793 b1=NULL;
794 }
795
796 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
797 uprv_free(b2);
798 b2=NULL;
799 }
800 return u_terminateUChars(dest, destCapacity, b2Len, status);
801 }
802
803
804 /* data swapping ------------------------------------------------------------ */
805
806 U_CAPI int32_t U_EXPORT2
807 usprep_swap(const UDataSwapper *ds,
808 const void *inData, int32_t length, void *outData,
809 UErrorCode *pErrorCode) {
810 const UDataInfo *pInfo;
811 int32_t headerSize;
812
813 const uint8_t *inBytes;
814 uint8_t *outBytes;
815
816 const int32_t *inIndexes;
817 int32_t indexes[16];
818
819 int32_t i, offset, count, size;
820
821 /* udata_swapDataHeader checks the arguments */
822 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
823 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
824 return 0;
825 }
826
827 /* check data format and format version */
828 pInfo=(const UDataInfo *)((const char *)inData+4);
829 if(!(
830 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
831 pInfo->dataFormat[1]==0x50 &&
832 pInfo->dataFormat[2]==0x52 &&
833 pInfo->dataFormat[3]==0x50 &&
834 pInfo->formatVersion[0]==3
835 )) {
836 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
837 pInfo->dataFormat[0], pInfo->dataFormat[1],
838 pInfo->dataFormat[2], pInfo->dataFormat[3],
839 pInfo->formatVersion[0]);
840 *pErrorCode=U_UNSUPPORTED_ERROR;
841 return 0;
842 }
843
844 inBytes=(const uint8_t *)inData+headerSize;
845 outBytes=(uint8_t *)outData+headerSize;
846
847 inIndexes=(const int32_t *)inBytes;
848
849 if(length>=0) {
850 length-=headerSize;
851 if(length<16*4) {
852 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
853 length);
854 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
855 return 0;
856 }
857 }
858
859 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
860 for(i=0; i<16; ++i) {
861 indexes[i]=udata_readInt32(ds, inIndexes[i]);
862 }
863
864 /* calculate the total length of the data */
865 size=
866 16*4+ /* size of indexes[] */
867 indexes[_SPREP_INDEX_TRIE_SIZE]+
868 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
869
870 if(length>=0) {
871 if(length<size) {
872 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
873 length);
874 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
875 return 0;
876 }
877
878 /* copy the data for inaccessible bytes */
879 if(inBytes!=outBytes) {
880 uprv_memcpy(outBytes, inBytes, size);
881 }
882
883 offset=0;
884
885 /* swap the int32_t indexes[] */
886 count=16*4;
887 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
888 offset+=count;
889
890 /* swap the UTrie */
891 count=indexes[_SPREP_INDEX_TRIE_SIZE];
892 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
893 offset+=count;
894
895 /* swap the uint16_t mappingTable[] */
896 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
897 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
898 offset+=count;
899 }
900
901 return headerSize+size;
902 }
903
904 #endif /* #if !UCONFIG_NO_IDNA */