]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-8.11.2.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
374ca955
A
1/*
2 *******************************************************************************
3 *
73c04bcf 4 * Copyright (C) 2003-2006, International Business Machines
374ca955
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/usprep.h"
22
23#include "unicode/unorm.h"
24#include "unicode/ustring.h"
25#include "unicode/uchar.h"
26#include "unicode/uversion.h"
27#include "umutex.h"
28#include "cmemory.h"
29#include "sprpimpl.h"
30#include "ustr_imp.h"
31#include "uhash.h"
32#include "cstring.h"
33#include "udataswp.h"
34#include "ucln_cmn.h"
35#include "unormimp.h"
73c04bcf 36#include "ubidi_props.h"
374ca955
A
37
38U_CDECL_BEGIN
39
40/*
41Static cache for already opened StringPrep profiles
42*/
43static UHashtable *SHARED_DATA_HASHTABLE = NULL;
44
45static UMTX usprepMutex = NULL;
46
47/* format version of spp file */
48static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
49
50/* the Unicode version of the sprep data */
51static UVersionInfo dataVersion={ 0, 0, 0, 0 };
52
53static UBool U_CALLCONV
54isSPrepAcceptable(void * /* context */,
55 const char * /* type */,
56 const char * /* name */,
57 const UDataInfo *pInfo) {
58 if(
59 pInfo->size>=20 &&
60 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61 pInfo->charsetFamily==U_CHARSET_FAMILY &&
62 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
63 pInfo->dataFormat[1]==0x50 &&
64 pInfo->dataFormat[2]==0x52 &&
65 pInfo->dataFormat[3]==0x50 &&
66 pInfo->formatVersion[0]==3 &&
67 pInfo->formatVersion[2]==UTRIE_SHIFT &&
68 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
69 ) {
70 uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
71 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
72 return TRUE;
73 } else {
74 return FALSE;
75 }
76}
77
78static int32_t U_CALLCONV
79getSPrepFoldingOffset(uint32_t data) {
80
81 return (int32_t)data;
82
83}
84
85/* hashes an entry */
73c04bcf 86static int32_t U_CALLCONV
374ca955
A
87hashEntry(const UHashTok parm) {
88 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
89 UHashTok namekey, pathkey;
90 namekey.pointer = b->name;
91 pathkey.pointer = b->path;
92 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
93}
94
95/* compares two entries */
73c04bcf 96static UBool U_CALLCONV
374ca955
A
97compareEntries(const UHashTok p1, const UHashTok p2) {
98 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
99 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
100 UHashTok name1, name2, path1, path2;
101 name1.pointer = b1->name;
102 name2.pointer = b2->name;
103 path1.pointer = b1->path;
104 path2.pointer = b2->path;
105 return ((UBool)(uhash_compareChars(name1, name2) &
106 uhash_compareChars(path1, path2)));
107}
108
109static void
110usprep_unload(UStringPrepProfile* data){
111 udata_close(data->sprepData);
112}
113
114static int32_t
115usprep_internal_flushCache(UBool noRefCount){
116 UStringPrepProfile *profile = NULL;
117 UStringPrepKey *key = NULL;
118 int32_t pos = -1;
119 int32_t deletedNum = 0;
120 const UHashElement *e;
121
122 /*
123 * if shared data hasn't even been lazy evaluated yet
124 * return 0
125 */
126 umtx_lock(&usprepMutex);
127 if (SHARED_DATA_HASHTABLE == NULL) {
128 umtx_unlock(&usprepMutex);
129 return 0;
130 }
131
132 /*creates an enumeration to iterate through every element in the table */
133 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
134 {
135 profile = (UStringPrepProfile *) e->value.pointer;
136 key = (UStringPrepKey *) e->key.pointer;
137
138 if ((noRefCount== FALSE && profile->refCount == 0) ||
139 noRefCount== TRUE) {
140 deletedNum++;
141 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
142
143 /* unload the data */
144 usprep_unload(profile);
145
146 if(key->name != NULL) {
147 uprv_free(key->name);
148 key->name=NULL;
149 }
150 if(key->path != NULL) {
151 uprv_free(key->path);
152 key->path=NULL;
153 }
154 uprv_free(profile);
155 uprv_free(key);
156 }
157
158 }
159 umtx_unlock(&usprepMutex);
160
161 return deletedNum;
162}
163
164/* Works just like ucnv_flushCache()
165static int32_t
166usprep_flushCache(){
167 return usprep_internal_flushCache(FALSE);
168}
169*/
170
171static UBool U_CALLCONV usprep_cleanup(void){
172 if (SHARED_DATA_HASHTABLE != NULL) {
173 usprep_internal_flushCache(TRUE);
174 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
175 uhash_close(SHARED_DATA_HASHTABLE);
176 SHARED_DATA_HASHTABLE = NULL;
177 }
178 }
179
180 umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
181 /* if the hash table still exists. The mutex */
182 /* will lazily re-init itself if needed. */
183 return (SHARED_DATA_HASHTABLE == NULL);
184}
185U_CDECL_END
186
187static void
188usprep_init() {
189 umtx_init(&usprepMutex);
190}
191
192/** Initializes the cache for resources */
193static void
194initCache(UErrorCode *status) {
195 UBool makeCache = FALSE;
196 umtx_lock(&usprepMutex);
197 makeCache = (SHARED_DATA_HASHTABLE == NULL);
198 umtx_unlock(&usprepMutex);
199 if(makeCache) {
73c04bcf
A
200 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
201 if (U_SUCCESS(*status)) {
202 umtx_lock(&usprepMutex);
203 if(SHARED_DATA_HASHTABLE == NULL) {
204 SHARED_DATA_HASHTABLE = newCache;
205 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
206 newCache = NULL;
207 }
208 umtx_unlock(&usprepMutex);
209 if(newCache != NULL) {
210 uhash_close(newCache);
211 }
374ca955
A
212 }
213 }
214}
215
216static UBool U_CALLCONV
217loadData(UStringPrepProfile* profile,
218 const char* path,
219 const char* name,
220 const char* type,
221 UErrorCode* errorCode) {
222 /* load Unicode SPREP data from file */
223 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
224 UDataMemory *dataMemory;
225 const int32_t *p=NULL;
226 const uint8_t *pb;
227 UVersionInfo normUnicodeVersion;
228 int32_t normUniVer, sprepUniVer, normCorrVer;
229
230 if(errorCode==NULL || U_FAILURE(*errorCode)) {
231 return 0;
232 }
233
234 /* open the data outside the mutex block */
235 //TODO: change the path
236 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
237 if(U_FAILURE(*errorCode)) {
238 return FALSE;
239 }
240
241 p=(const int32_t *)udata_getMemory(dataMemory);
242 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
243 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
244 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
245
246
247 if(U_FAILURE(*errorCode)) {
248 udata_close(dataMemory);
249 return FALSE;
250 }
251
252 /* in the mutex block, set the data for this process */
253 umtx_lock(&usprepMutex);
254 if(profile->sprepData==NULL) {
255 profile->sprepData=dataMemory;
256 dataMemory=NULL;
257 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
258 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
259 } else {
260 p=(const int32_t *)udata_getMemory(profile->sprepData);
261 }
262 umtx_unlock(&usprepMutex);
263 /* initialize some variables */
264 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
265
266 unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
267 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
268 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
269 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
270 (dataVersion[2] << 8 ) + (dataVersion[3]);
271 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
272
273 if(U_FAILURE(*errorCode)){
274 udata_close(dataMemory);
275 return FALSE;
276 }
277 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
278 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
279 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
280 ){
281 *errorCode = U_INVALID_FORMAT_ERROR;
282 udata_close(dataMemory);
283 return FALSE;
284 }
285 profile->isDataLoaded = TRUE;
286
287 /* if a different thread set it first, then close the extra data */
288 if(dataMemory!=NULL) {
289 udata_close(dataMemory); /* NULL if it was set correctly */
290 }
291
292
293 return profile->isDataLoaded;
294}
295
296static UStringPrepProfile*
297usprep_getProfile(const char* path,
298 const char* name,
299 UErrorCode *status){
300
301 UStringPrepProfile* profile = NULL;
302
303 initCache(status);
304
305 if(U_FAILURE(*status)){
306 return NULL;
307 }
308
309 UStringPrepKey stackKey;
310 /*
311 * const is cast way to save malloc, strcpy and free calls
312 * we use the passed in pointers for fetching the data from the
313 * hash table which is safe
314 */
315 stackKey.name = (char*) name;
316 stackKey.path = (char*) path;
317
318 /* fetch the data from the cache */
73c04bcf 319 umtx_lock(&usprepMutex);
374ca955 320 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
73c04bcf 321 umtx_unlock(&usprepMutex);
374ca955
A
322
323 if(profile == NULL){
324 UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
325 if(key == NULL){
326 *status = U_MEMORY_ALLOCATION_ERROR;
327 return NULL;
328 }
329 /* else load the data and put the data in the cache */
330 profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
331 if(profile == NULL){
332 *status = U_MEMORY_ALLOCATION_ERROR;
333 uprv_free(key);
334 return NULL;
335 }
336
337 /* initialize the data struct members */
338 uprv_memset(profile->indexes,0,sizeof(profile->indexes));
339 profile->mappingData = NULL;
340 profile->sprepData = NULL;
341 profile->refCount = 0;
342
343 /* initialize the key memebers */
344 key->name = (char*) uprv_malloc(uprv_strlen(name)+1);
345 if(key->name == NULL){
346 *status = U_MEMORY_ALLOCATION_ERROR;
347 uprv_free(key);
348 uprv_free(profile);
349 return NULL;
350 }
351
352 uprv_strcpy(key->name, name);
353
354 key->path=NULL;
355
356 if(path != NULL){
357 key->path = (char*) uprv_malloc(uprv_strlen(path)+1);
358 if(key->path == NULL){
359 *status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 360 uprv_free(key->name);
374ca955
A
361 uprv_free(key);
362 uprv_free(profile);
363 return NULL;
364 }
365 uprv_strcpy(key->path, path);
366 }
367
368 /* load the data */
369 if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
73c04bcf
A
370 uprv_free(key->path);
371 uprv_free(key->name);
372 uprv_free(key);
373 uprv_free(profile);
374ca955
A
374 return NULL;
375 }
376
377 /* get the options */
378 profile->doNFKC = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
379 profile->checkBiDi = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
73c04bcf
A
380
381 if(profile->checkBiDi) {
382 profile->bdp = ubidi_getSingleton(status);
383 if(U_FAILURE(*status)) {
384 usprep_unload(profile);
385 uprv_free(key->path);
386 uprv_free(key->name);
387 uprv_free(key);
388 uprv_free(profile);
389 return NULL;
390 }
391 } else {
392 profile->bdp = NULL;
393 }
374ca955
A
394
395 umtx_lock(&usprepMutex);
396 /* add the data object to the cache */
397 uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
398 umtx_unlock(&usprepMutex);
399 }
400 umtx_lock(&usprepMutex);
401 /* increment the refcount */
402 profile->refCount++;
403 umtx_unlock(&usprepMutex);
404
405 return profile;
406}
407
408U_CAPI UStringPrepProfile* U_EXPORT2
409usprep_open(const char* path,
410 const char* name,
411 UErrorCode* status){
412
413 if(status == NULL || U_FAILURE(*status)){
414 return NULL;
415 }
416 /* initialize the mutex */
417 usprep_init();
418
419 /* initialize the profile struct members */
73c04bcf 420 return usprep_getProfile(path,name,status);
374ca955
A
421}
422
423U_CAPI void U_EXPORT2
424usprep_close(UStringPrepProfile* profile){
425 if(profile==NULL){
426 return;
427 }
428
429 umtx_lock(&usprepMutex);
430 /* decrement the ref count*/
431 if(profile->refCount > 0){
432 profile->refCount--;
433 }
434 umtx_unlock(&usprepMutex);
435
436}
437
438U_CFUNC void
439uprv_syntaxError(const UChar* rules,
440 int32_t pos,
441 int32_t rulesLen,
442 UParseError* parseError){
443 if(parseError == NULL){
444 return;
445 }
446 parseError->offset = pos;
447 parseError->line = 0 ; // we are not using line numbers
448
449 // for pre-context
450 int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
451 int32_t limit = pos;
452
453 u_memcpy(parseError->preContext,rules+start,limit-start);
454 //null terminate the buffer
455 parseError->preContext[limit-start] = 0;
456
457 // for post-context; include error rules[pos]
458 start = pos;
459 limit = start + (U_PARSE_CONTEXT_LEN-1);
460 if (limit > rulesLen) {
461 limit = rulesLen;
462 }
463 if (start < rulesLen) {
464 u_memcpy(parseError->postContext,rules+start,limit-start);
465 }
466 //null terminate the buffer
467 parseError->postContext[limit-start]= 0;
468}
469
470
471static inline UStringPrepType
472getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
473
474 UStringPrepType type;
475 if(trieWord == 0){
476 /*
477 * Initial value stored in the mapping table
478 * just return USPREP_TYPE_LIMIT .. so that
479 * the source codepoint is copied to the destination
480 */
481 type = USPREP_TYPE_LIMIT;
73c04bcf
A
482 isIndex =FALSE;
483 value = 0;
374ca955
A
484 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
485 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
486 isIndex =FALSE;
487 value = 0;
374ca955
A
488 }else{
489 /* get the type */
490 type = USPREP_MAP;
491 /* ascertain if the value is index or delta */
492 if(trieWord & 0x02){
493 isIndex = TRUE;
494 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
495 }else{
496 isIndex = FALSE;
497 value = (int16_t)trieWord;
498 value = (value >> 2);
374ca955
A
499 }
500
501 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
502 type = USPREP_DELETE;
503 isIndex =FALSE;
504 value = 0;
505 }
506 }
507 return type;
508}
509
510
511
512static int32_t
513usprep_map( const UStringPrepProfile* profile,
514 const UChar* src, int32_t srcLength,
515 UChar* dest, int32_t destCapacity,
516 int32_t options,
517 UParseError* parseError,
518 UErrorCode* status ){
519
520 uint16_t result;
521 int32_t destIndex=0;
522 int32_t srcIndex;
523 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
524 UStringPrepType type;
525 int16_t value;
526 UBool isIndex;
527 const int32_t* indexes = profile->indexes;
528
529 // no error checking the caller check for error and arguments
530 // no string length check the caller finds out the string length
531
532 for(srcIndex=0;srcIndex<srcLength;){
533 UChar32 ch;
534
535 U16_NEXT(src,srcIndex,srcLength,ch);
536
537 result=0;
538
539 UTRIE_GET16(&profile->sprepTrie,ch,result);
540
541 type = getValues(result, value, isIndex);
542
543 // check if the source codepoint is unassigned
544 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
545
546 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
547 *status = U_STRINGPREP_UNASSIGNED_ERROR;
548 return 0;
549
550 }else if(type == USPREP_MAP){
551
552 int32_t index, length;
553
554 if(isIndex){
555 index = value;
556 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
557 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
558 length = 1;
559 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
560 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
561 length = 2;
562 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
563 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
564 length = 3;
565 }else{
566 length = profile->mappingData[index++];
567
568 }
569
570 /* copy mapping to destination */
571 for(int32_t i=0; i< length; i++){
572 if(destIndex < destCapacity ){
573 dest[destIndex] = profile->mappingData[index+i];
574 }
575 destIndex++; /* for pre-flighting */
576 }
577 continue;
578 }else{
579 // subtract the delta to arrive at the code point
580 ch -= value;
581 }
582
583 }else if(type==USPREP_DELETE){
584 // just consume the codepoint and contine
585 continue;
586 }
587 //copy the code point into destination
588 if(ch <= 0xFFFF){
589 if(destIndex < destCapacity ){
590 dest[destIndex] = (UChar)ch;
591 }
592 destIndex++;
593 }else{
594 if(destIndex+1 < destCapacity ){
595 dest[destIndex] = U16_LEAD(ch);
596 dest[destIndex+1] = U16_TRAIL(ch);
597 }
598 destIndex +=2;
599 }
600
601 }
602
603 return u_terminateUChars(dest, destCapacity, destIndex, status);
604}
605
606
607static int32_t
608usprep_normalize( const UChar* src, int32_t srcLength,
609 UChar* dest, int32_t destCapacity,
610 UErrorCode* status ){
611 /*
612 * Option UNORM_BEFORE_PRI_29:
613 *
614 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
615 * requires strict adherence to Unicode 3.2 normalization,
616 * including buggy composition from before fixing Public Review Issue #29.
617 * Note that this results in some valid but nonsensical text to be
618 * either corrupted or rejected, depending on the text.
619 * See http://www.unicode.org/review/resolved-pri.html#pri29
620 * See unorm.cpp and cnormtst.c
621 */
622 return unorm_normalize(
623 src, srcLength,
624 UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
625 dest, destCapacity,
626 status);
627}
628
629
630 /*
631 1) Map -- For each character in the input, check if it has a mapping
632 and, if so, replace it with its mapping.
633
634 2) Normalize -- Possibly normalize the result of step 1 using Unicode
635 normalization.
636
637 3) Prohibit -- Check for any characters that are not allowed in the
638 output. If any are found, return an error.
639
640 4) Check bidi -- Possibly check for right-to-left characters, and if
641 any are found, make sure that the whole string satisfies the
642 requirements for bidirectional strings. If the string does not
643 satisfy the requirements for bidirectional strings, return an
644 error.
645 [Unicode3.2] defines several bidirectional categories; each character
646 has one bidirectional category assigned to it. For the purposes of
647 the requirements below, an "RandALCat character" is a character that
648 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
649 is a character that has Unicode bidirectional category "L". Note
650
651
652 that there are many characters which fall in neither of the above
653 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
654 this because they have bidirectional category "EN".
655
656 In any profile that specifies bidirectional character handling, all
657 three of the following requirements MUST be met:
658
659 1) The characters in section 5.8 MUST be prohibited.
660
661 2) If a string contains any RandALCat character, the string MUST NOT
662 contain any LCat character.
663
664 3) If a string contains any RandALCat character, a RandALCat
665 character MUST be the first character of the string, and a
666 RandALCat character MUST be the last character of the string.
667*/
668
669#define MAX_STACK_BUFFER_SIZE 300
670
671
672U_CAPI int32_t U_EXPORT2
673usprep_prepare( const UStringPrepProfile* profile,
674 const UChar* src, int32_t srcLength,
675 UChar* dest, int32_t destCapacity,
676 int32_t options,
677 UParseError* parseError,
678 UErrorCode* status ){
679
680 // check error status
681 if(status == NULL || U_FAILURE(*status)){
682 return 0;
683 }
684
685 //check arguments
686 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
687 *status=U_ILLEGAL_ARGUMENT_ERROR;
688 return 0;
689 }
690
691 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
692 UChar *b1 = b1Stack, *b2 = b2Stack;
693 int32_t b1Len, b2Len=0,
694 b1Capacity = MAX_STACK_BUFFER_SIZE ,
695 b2Capacity = MAX_STACK_BUFFER_SIZE;
696 uint16_t result;
697 int32_t b2Index = 0;
698 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
699 UBool leftToRight=FALSE, rightToLeft=FALSE;
700 int32_t rtlPos =-1, ltrPos =-1;
701
702 //get the string length
703 if(srcLength == -1){
704 srcLength = u_strlen(src);
705 }
706 // map
707 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
708
709 if(*status == U_BUFFER_OVERFLOW_ERROR){
710 // redo processing of string
711 /* we do not have enough room so grow the buffer*/
712 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
713 if(b1==NULL){
714 *status = U_MEMORY_ALLOCATION_ERROR;
715 goto CLEANUP;
716 }
717
718 *status = U_ZERO_ERROR; // reset error
719
720 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
721
722 }
723
724 // normalize
725 if(profile->doNFKC == TRUE){
726 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
727
728 if(*status == U_BUFFER_OVERFLOW_ERROR){
729 // redo processing of string
730 /* we do not have enough room so grow the buffer*/
731 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
732 if(b2==NULL){
733 *status = U_MEMORY_ALLOCATION_ERROR;
734 goto CLEANUP;
735 }
736
737 *status = U_ZERO_ERROR; // reset error
738
739 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
740
741 }
742
743 }else{
744 b2 = b1;
745 b2Len = b1Len;
746 }
747
748
749 if(U_FAILURE(*status)){
750 goto CLEANUP;
751 }
752
753 UChar32 ch;
754 UStringPrepType type;
755 int16_t value;
756 UBool isIndex;
757
758 // Prohibit and checkBiDi in one pass
759 for(b2Index=0; b2Index<b2Len;){
760
761 ch = 0;
762
763 U16_NEXT(b2, b2Index, b2Len, ch);
764
765 UTRIE_GET16(&profile->sprepTrie,ch,result);
766
767 type = getValues(result, value, isIndex);
768
769 if( type == USPREP_PROHIBITED ||
770 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
771 ){
772 *status = U_STRINGPREP_PROHIBITED_ERROR;
773 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
774 goto CLEANUP;
775 }
776
73c04bcf
A
777 if(profile->checkBiDi) {
778 direction = ubidi_getClass(profile->bdp, ch);
779 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
780 firstCharDir = direction;
781 }
782 if(direction == U_LEFT_TO_RIGHT){
783 leftToRight = TRUE;
784 ltrPos = b2Index-1;
785 }
786 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
787 rightToLeft = TRUE;
788 rtlPos = b2Index-1;
789 }
374ca955
A
790 }
791 }
792 if(profile->checkBiDi == TRUE){
793 // satisfy 2
794 if( leftToRight == TRUE && rightToLeft == TRUE){
795 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
796 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
797 goto CLEANUP;
798 }
799
800 //satisfy 3
801 if( rightToLeft == TRUE &&
802 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
803 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
804 ){
805 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
806 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
807 return FALSE;
808 }
809 }
73c04bcf 810 if(b2Len>0 && b2Len <= destCapacity){
374ca955
A
811 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
812 }
813
814CLEANUP:
815 if(b1!=b1Stack){
816 uprv_free(b1);
817 b1=NULL;
818 }
819
820 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
821 uprv_free(b2);
822 b2=NULL;
823 }
824 return u_terminateUChars(dest, destCapacity, b2Len, status);
825}
826
827
828/* data swapping ------------------------------------------------------------ */
829
830U_CAPI int32_t U_EXPORT2
831usprep_swap(const UDataSwapper *ds,
832 const void *inData, int32_t length, void *outData,
833 UErrorCode *pErrorCode) {
834 const UDataInfo *pInfo;
835 int32_t headerSize;
836
837 const uint8_t *inBytes;
838 uint8_t *outBytes;
839
840 const int32_t *inIndexes;
841 int32_t indexes[16];
842
843 int32_t i, offset, count, size;
844
845 /* udata_swapDataHeader checks the arguments */
846 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
847 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
848 return 0;
849 }
850
851 /* check data format and format version */
852 pInfo=(const UDataInfo *)((const char *)inData+4);
853 if(!(
854 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
855 pInfo->dataFormat[1]==0x50 &&
856 pInfo->dataFormat[2]==0x52 &&
857 pInfo->dataFormat[3]==0x50 &&
858 pInfo->formatVersion[0]==3
859 )) {
860 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
861 pInfo->dataFormat[0], pInfo->dataFormat[1],
862 pInfo->dataFormat[2], pInfo->dataFormat[3],
863 pInfo->formatVersion[0]);
864 *pErrorCode=U_UNSUPPORTED_ERROR;
865 return 0;
866 }
867
868 inBytes=(const uint8_t *)inData+headerSize;
869 outBytes=(uint8_t *)outData+headerSize;
870
871 inIndexes=(const int32_t *)inBytes;
872
873 if(length>=0) {
874 length-=headerSize;
875 if(length<16*4) {
876 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
877 length);
878 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
879 return 0;
880 }
881 }
882
883 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
884 for(i=0; i<16; ++i) {
885 indexes[i]=udata_readInt32(ds, inIndexes[i]);
886 }
887
888 /* calculate the total length of the data */
889 size=
890 16*4+ /* size of indexes[] */
891 indexes[_SPREP_INDEX_TRIE_SIZE]+
892 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
893
894 if(length>=0) {
895 if(length<size) {
896 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
897 length);
898 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
899 return 0;
900 }
901
902 /* copy the data for inaccessible bytes */
903 if(inBytes!=outBytes) {
904 uprv_memcpy(outBytes, inBytes, size);
905 }
906
907 offset=0;
908
909 /* swap the int32_t indexes[] */
910 count=16*4;
911 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
912 offset+=count;
913
914 /* swap the UTrie */
915 count=indexes[_SPREP_INDEX_TRIE_SIZE];
916 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
917 offset+=count;
918
919 /* swap the uint16_t mappingTable[] */
920 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
921 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
922 offset+=count;
923 }
924
925 return headerSize+size;
926}
927
928#endif /* #if !UCONFIG_NO_IDNA */