]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
374ca955
A
1/*
2 *******************************************************************************
3 *
b331163b 4 * Copyright (C) 2003-2014, International Business Machines
374ca955
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/usprep.h"
22
b331163b 23#include "unicode/normalizer2.h"
374ca955
A
24#include "unicode/ustring.h"
25#include "unicode/uchar.h"
26#include "unicode/uversion.h"
27#include "umutex.h"
28#include "cmemory.h"
29#include "sprpimpl.h"
30#include "ustr_imp.h"
31#include "uhash.h"
32#include "cstring.h"
33#include "udataswp.h"
34#include "ucln_cmn.h"
73c04bcf 35#include "ubidi_props.h"
b331163b 36#include "uprops.h"
374ca955 37
729e4ab9
A
38U_NAMESPACE_USE
39
374ca955
A
40U_CDECL_BEGIN
41
42/*
43Static cache for already opened StringPrep profiles
44*/
45static UHashtable *SHARED_DATA_HASHTABLE = NULL;
57a6839d 46static icu::UInitOnce gSharedDataInitOnce;
374ca955 47
51004dcb 48static UMutex usprepMutex = U_MUTEX_INITIALIZER;
374ca955
A
49
50/* format version of spp file */
51004dcb 51//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
374ca955
A
52
53/* the Unicode version of the sprep data */
54static UVersionInfo dataVersion={ 0, 0, 0, 0 };
55
729e4ab9 56/* Profile names must be aligned to UStringPrepProfileType */
51004dcb 57static const char * const PROFILE_NAMES[] = {
729e4ab9
A
58 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
59 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
60 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
61 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
62 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
63 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
64 "rfc3722", /* USPREP_RFC3722_ISCSI */
65 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
66 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
67 "rfc4011", /* USPREP_RFC4011_MIB */
68 "rfc4013", /* USPREP_RFC4013_SASLPREP */
69 "rfc4505", /* USPREP_RFC4505_TRACE */
70 "rfc4518", /* USPREP_RFC4518_LDAP */
71 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
72};
73
374ca955
A
74static UBool U_CALLCONV
75isSPrepAcceptable(void * /* context */,
76 const char * /* type */,
77 const char * /* name */,
78 const UDataInfo *pInfo) {
79 if(
80 pInfo->size>=20 &&
81 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
82 pInfo->charsetFamily==U_CHARSET_FAMILY &&
83 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
84 pInfo->dataFormat[1]==0x50 &&
85 pInfo->dataFormat[2]==0x52 &&
86 pInfo->dataFormat[3]==0x50 &&
87 pInfo->formatVersion[0]==3 &&
88 pInfo->formatVersion[2]==UTRIE_SHIFT &&
89 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
90 ) {
51004dcb 91 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
374ca955
A
92 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
93 return TRUE;
94 } else {
95 return FALSE;
96 }
97}
98
99static int32_t U_CALLCONV
100getSPrepFoldingOffset(uint32_t data) {
101
102 return (int32_t)data;
103
104}
105
106/* hashes an entry */
73c04bcf 107static int32_t U_CALLCONV
374ca955
A
108hashEntry(const UHashTok parm) {
109 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
110 UHashTok namekey, pathkey;
111 namekey.pointer = b->name;
112 pathkey.pointer = b->path;
113 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
114}
115
116/* compares two entries */
73c04bcf 117static UBool U_CALLCONV
374ca955
A
118compareEntries(const UHashTok p1, const UHashTok p2) {
119 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
120 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
121 UHashTok name1, name2, path1, path2;
122 name1.pointer = b1->name;
123 name2.pointer = b2->name;
124 path1.pointer = b1->path;
125 path2.pointer = b2->path;
126 return ((UBool)(uhash_compareChars(name1, name2) &
127 uhash_compareChars(path1, path2)));
128}
129
130static void
131usprep_unload(UStringPrepProfile* data){
132 udata_close(data->sprepData);
133}
134
135static int32_t
136usprep_internal_flushCache(UBool noRefCount){
137 UStringPrepProfile *profile = NULL;
138 UStringPrepKey *key = NULL;
b331163b 139 int32_t pos = UHASH_FIRST;
374ca955
A
140 int32_t deletedNum = 0;
141 const UHashElement *e;
142
143 /*
144 * if shared data hasn't even been lazy evaluated yet
145 * return 0
146 */
147 umtx_lock(&usprepMutex);
148 if (SHARED_DATA_HASHTABLE == NULL) {
149 umtx_unlock(&usprepMutex);
150 return 0;
151 }
152
153 /*creates an enumeration to iterate through every element in the table */
154 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
155 {
156 profile = (UStringPrepProfile *) e->value.pointer;
157 key = (UStringPrepKey *) e->key.pointer;
158
159 if ((noRefCount== FALSE && profile->refCount == 0) ||
160 noRefCount== TRUE) {
161 deletedNum++;
162 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
163
164 /* unload the data */
165 usprep_unload(profile);
166
167 if(key->name != NULL) {
168 uprv_free(key->name);
169 key->name=NULL;
170 }
171 if(key->path != NULL) {
172 uprv_free(key->path);
173 key->path=NULL;
174 }
175 uprv_free(profile);
176 uprv_free(key);
177 }
178
179 }
180 umtx_unlock(&usprepMutex);
181
182 return deletedNum;
183}
184
185/* Works just like ucnv_flushCache()
186static int32_t
187usprep_flushCache(){
188 return usprep_internal_flushCache(FALSE);
189}
190*/
191
192static UBool U_CALLCONV usprep_cleanup(void){
193 if (SHARED_DATA_HASHTABLE != NULL) {
194 usprep_internal_flushCache(TRUE);
195 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
196 uhash_close(SHARED_DATA_HASHTABLE);
197 SHARED_DATA_HASHTABLE = NULL;
198 }
199 }
57a6839d 200 gSharedDataInitOnce.reset();
374ca955
A
201 return (SHARED_DATA_HASHTABLE == NULL);
202}
203U_CDECL_END
204
374ca955
A
205
206/** Initializes the cache for resources */
57a6839d
A
207static void U_CALLCONV
208createCache(UErrorCode &status) {
209 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
210 if (U_FAILURE(status)) {
211 SHARED_DATA_HASHTABLE = NULL;
212 }
213 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
214}
215
374ca955
A
216static void
217initCache(UErrorCode *status) {
57a6839d 218 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
374ca955
A
219}
220
221static UBool U_CALLCONV
222loadData(UStringPrepProfile* profile,
223 const char* path,
224 const char* name,
225 const char* type,
226 UErrorCode* errorCode) {
227 /* load Unicode SPREP data from file */
228 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
229 UDataMemory *dataMemory;
230 const int32_t *p=NULL;
231 const uint8_t *pb;
232 UVersionInfo normUnicodeVersion;
233 int32_t normUniVer, sprepUniVer, normCorrVer;
234
235 if(errorCode==NULL || U_FAILURE(*errorCode)) {
236 return 0;
237 }
238
239 /* open the data outside the mutex block */
240 //TODO: change the path
241 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
242 if(U_FAILURE(*errorCode)) {
243 return FALSE;
244 }
245
246 p=(const int32_t *)udata_getMemory(dataMemory);
247 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
248 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
249 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
250
251
252 if(U_FAILURE(*errorCode)) {
253 udata_close(dataMemory);
254 return FALSE;
255 }
256
257 /* in the mutex block, set the data for this process */
258 umtx_lock(&usprepMutex);
259 if(profile->sprepData==NULL) {
260 profile->sprepData=dataMemory;
261 dataMemory=NULL;
262 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
263 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
264 } else {
265 p=(const int32_t *)udata_getMemory(profile->sprepData);
266 }
267 umtx_unlock(&usprepMutex);
268 /* initialize some variables */
269 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
270
729e4ab9 271 u_getUnicodeVersion(normUnicodeVersion);
374ca955
A
272 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
273 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
274 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
275 (dataVersion[2] << 8 ) + (dataVersion[3]);
276 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
277
278 if(U_FAILURE(*errorCode)){
279 udata_close(dataMemory);
280 return FALSE;
281 }
282 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
283 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
284 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
285 ){
286 *errorCode = U_INVALID_FORMAT_ERROR;
287 udata_close(dataMemory);
288 return FALSE;
289 }
290 profile->isDataLoaded = TRUE;
291
292 /* if a different thread set it first, then close the extra data */
293 if(dataMemory!=NULL) {
294 udata_close(dataMemory); /* NULL if it was set correctly */
295 }
296
297
298 return profile->isDataLoaded;
299}
300
301static UStringPrepProfile*
302usprep_getProfile(const char* path,
303 const char* name,
304 UErrorCode *status){
305
306 UStringPrepProfile* profile = NULL;
307
308 initCache(status);
309
310 if(U_FAILURE(*status)){
311 return NULL;
312 }
313
314 UStringPrepKey stackKey;
315 /*
316 * const is cast way to save malloc, strcpy and free calls
317 * we use the passed in pointers for fetching the data from the
318 * hash table which is safe
319 */
320 stackKey.name = (char*) name;
321 stackKey.path = (char*) path;
322
323 /* fetch the data from the cache */
73c04bcf 324 umtx_lock(&usprepMutex);
374ca955 325 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
729e4ab9
A
326 if(profile != NULL) {
327 profile->refCount++;
328 }
73c04bcf 329 umtx_unlock(&usprepMutex);
374ca955 330
729e4ab9 331 if(profile == NULL) {
374ca955 332 /* else load the data and put the data in the cache */
729e4ab9
A
333 LocalMemory<UStringPrepProfile> newProfile;
334 if(newProfile.allocateInsteadAndReset() == NULL) {
374ca955 335 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955
A
336 return NULL;
337 }
338
729e4ab9
A
339 /* load the data */
340 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
374ca955
A
341 return NULL;
342 }
343
729e4ab9
A
344 /* get the options */
345 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
346 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374ca955 347
729e4ab9
A
348 if(newProfile->checkBiDi) {
349 newProfile->bdp = ubidi_getSingleton();
350 }
374ca955 351
729e4ab9
A
352 LocalMemory<UStringPrepKey> key;
353 LocalMemory<char> keyName;
354 LocalMemory<char> keyPath;
355 if( key.allocateInsteadAndReset() == NULL ||
356 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
357 (path != NULL &&
358 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
359 ) {
360 *status = U_MEMORY_ALLOCATION_ERROR;
361 usprep_unload(newProfile.getAlias());
374ca955
A
362 return NULL;
363 }
73c04bcf 364
374ca955 365 umtx_lock(&usprepMutex);
729e4ab9
A
366 // If another thread already inserted the same key/value, refcount and cleanup our thread data
367 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
368 if(profile != NULL) {
369 profile->refCount++;
370 usprep_unload(newProfile.getAlias());
371 }
372 else {
373 /* initialize the key members */
374 key->name = keyName.orphan();
375 uprv_strcpy(key->name, name);
376 if(path != NULL){
377 key->path = keyPath.orphan();
378 uprv_strcpy(key->path, path);
379 }
380 profile = newProfile.orphan();
381
382 /* add the data object to the cache */
383 profile->refCount = 1;
384 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
385 }
374ca955
A
386 umtx_unlock(&usprepMutex);
387 }
374ca955
A
388
389 return profile;
390}
391
392U_CAPI UStringPrepProfile* U_EXPORT2
393usprep_open(const char* path,
394 const char* name,
395 UErrorCode* status){
396
397 if(status == NULL || U_FAILURE(*status)){
398 return NULL;
399 }
374ca955
A
400
401 /* initialize the profile struct members */
73c04bcf 402 return usprep_getProfile(path,name,status);
374ca955
A
403}
404
729e4ab9
A
405U_CAPI UStringPrepProfile* U_EXPORT2
406usprep_openByType(UStringPrepProfileType type,
407 UErrorCode* status) {
408 if(status == NULL || U_FAILURE(*status)){
409 return NULL;
410 }
411 int32_t index = (int32_t)type;
412 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
413 *status = U_ILLEGAL_ARGUMENT_ERROR;
414 return NULL;
415 }
416 return usprep_open(NULL, PROFILE_NAMES[index], status);
417}
418
374ca955
A
419U_CAPI void U_EXPORT2
420usprep_close(UStringPrepProfile* profile){
421 if(profile==NULL){
422 return;
423 }
424
425 umtx_lock(&usprepMutex);
426 /* decrement the ref count*/
427 if(profile->refCount > 0){
428 profile->refCount--;
429 }
430 umtx_unlock(&usprepMutex);
431
432}
433
434U_CFUNC void
435uprv_syntaxError(const UChar* rules,
436 int32_t pos,
437 int32_t rulesLen,
438 UParseError* parseError){
439 if(parseError == NULL){
440 return;
441 }
442 parseError->offset = pos;
443 parseError->line = 0 ; // we are not using line numbers
444
445 // for pre-context
46f4442e 446 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
374ca955
A
447 int32_t limit = pos;
448
449 u_memcpy(parseError->preContext,rules+start,limit-start);
450 //null terminate the buffer
451 parseError->preContext[limit-start] = 0;
452
453 // for post-context; include error rules[pos]
454 start = pos;
455 limit = start + (U_PARSE_CONTEXT_LEN-1);
456 if (limit > rulesLen) {
457 limit = rulesLen;
458 }
459 if (start < rulesLen) {
460 u_memcpy(parseError->postContext,rules+start,limit-start);
461 }
462 //null terminate the buffer
463 parseError->postContext[limit-start]= 0;
464}
465
466
467static inline UStringPrepType
468getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
469
470 UStringPrepType type;
471 if(trieWord == 0){
472 /*
473 * Initial value stored in the mapping table
474 * just return USPREP_TYPE_LIMIT .. so that
475 * the source codepoint is copied to the destination
476 */
477 type = USPREP_TYPE_LIMIT;
73c04bcf
A
478 isIndex =FALSE;
479 value = 0;
374ca955
A
480 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
481 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
482 isIndex =FALSE;
483 value = 0;
374ca955
A
484 }else{
485 /* get the type */
486 type = USPREP_MAP;
487 /* ascertain if the value is index or delta */
488 if(trieWord & 0x02){
489 isIndex = TRUE;
490 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
491 }else{
492 isIndex = FALSE;
493 value = (int16_t)trieWord;
494 value = (value >> 2);
374ca955
A
495 }
496
497 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
498 type = USPREP_DELETE;
499 isIndex =FALSE;
500 value = 0;
501 }
502 }
503 return type;
504}
505
b331163b 506// TODO: change to writing to UnicodeString not UChar *
374ca955
A
507static int32_t
508usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity,
511 int32_t options,
512 UParseError* parseError,
513 UErrorCode* status ){
514
515 uint16_t result;
516 int32_t destIndex=0;
517 int32_t srcIndex;
518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519 UStringPrepType type;
520 int16_t value;
521 UBool isIndex;
522 const int32_t* indexes = profile->indexes;
523
524 // no error checking the caller check for error and arguments
525 // no string length check the caller finds out the string length
526
527 for(srcIndex=0;srcIndex<srcLength;){
528 UChar32 ch;
529
530 U16_NEXT(src,srcIndex,srcLength,ch);
531
532 result=0;
533
534 UTRIE_GET16(&profile->sprepTrie,ch,result);
535
536 type = getValues(result, value, isIndex);
537
538 // check if the source codepoint is unassigned
539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540
541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542 *status = U_STRINGPREP_UNASSIGNED_ERROR;
543 return 0;
544
545 }else if(type == USPREP_MAP){
546
547 int32_t index, length;
548
549 if(isIndex){
550 index = value;
551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553 length = 1;
554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556 length = 2;
557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559 length = 3;
560 }else{
561 length = profile->mappingData[index++];
562
563 }
564
565 /* copy mapping to destination */
566 for(int32_t i=0; i< length; i++){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = profile->mappingData[index+i];
569 }
570 destIndex++; /* for pre-flighting */
571 }
572 continue;
573 }else{
574 // subtract the delta to arrive at the code point
575 ch -= value;
576 }
577
578 }else if(type==USPREP_DELETE){
579 // just consume the codepoint and contine
580 continue;
581 }
582 //copy the code point into destination
583 if(ch <= 0xFFFF){
584 if(destIndex < destCapacity ){
585 dest[destIndex] = (UChar)ch;
586 }
587 destIndex++;
588 }else{
589 if(destIndex+1 < destCapacity ){
590 dest[destIndex] = U16_LEAD(ch);
591 dest[destIndex+1] = U16_TRAIL(ch);
592 }
593 destIndex +=2;
594 }
595
596 }
597
598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599}
600
b331163b 601/*
374ca955
A
602 1) Map -- For each character in the input, check if it has a mapping
603 and, if so, replace it with its mapping.
604
605 2) Normalize -- Possibly normalize the result of step 1 using Unicode
606 normalization.
607
608 3) Prohibit -- Check for any characters that are not allowed in the
609 output. If any are found, return an error.
610
611 4) Check bidi -- Possibly check for right-to-left characters, and if
612 any are found, make sure that the whole string satisfies the
613 requirements for bidirectional strings. If the string does not
614 satisfy the requirements for bidirectional strings, return an
615 error.
616 [Unicode3.2] defines several bidirectional categories; each character
617 has one bidirectional category assigned to it. For the purposes of
618 the requirements below, an "RandALCat character" is a character that
619 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
620 is a character that has Unicode bidirectional category "L". Note
621
622
623 that there are many characters which fall in neither of the above
624 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
625 this because they have bidirectional category "EN".
626
627 In any profile that specifies bidirectional character handling, all
628 three of the following requirements MUST be met:
629
630 1) The characters in section 5.8 MUST be prohibited.
631
632 2) If a string contains any RandALCat character, the string MUST NOT
633 contain any LCat character.
634
635 3) If a string contains any RandALCat character, a RandALCat
636 character MUST be the first character of the string, and a
637 RandALCat character MUST be the last character of the string.
638*/
374ca955
A
639U_CAPI int32_t U_EXPORT2
640usprep_prepare( const UStringPrepProfile* profile,
641 const UChar* src, int32_t srcLength,
642 UChar* dest, int32_t destCapacity,
643 int32_t options,
644 UParseError* parseError,
645 UErrorCode* status ){
646
647 // check error status
b331163b 648 if(U_FAILURE(*status)){
374ca955
A
649 return 0;
650 }
b331163b 651
374ca955 652 //check arguments
b331163b
A
653 if(profile==NULL ||
654 (src==NULL ? srcLength!=0 : srcLength<-1) ||
655 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
374ca955
A
656 *status=U_ILLEGAL_ARGUMENT_ERROR;
657 return 0;
658 }
659
374ca955 660 //get the string length
b331163b 661 if(srcLength < 0){
374ca955
A
662 srcLength = u_strlen(src);
663 }
664 // map
b331163b
A
665 UnicodeString s1;
666 UChar *b1 = s1.getBuffer(srcLength);
667 if(b1==NULL){
668 *status = U_MEMORY_ALLOCATION_ERROR;
669 return 0;
670 }
671 int32_t b1Len = usprep_map(profile, src, srcLength,
672 b1, s1.getCapacity(), options, parseError, status);
673 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
374ca955
A
674
675 if(*status == U_BUFFER_OVERFLOW_ERROR){
676 // redo processing of string
677 /* we do not have enough room so grow the buffer*/
b331163b 678 b1 = s1.getBuffer(b1Len);
374ca955
A
679 if(b1==NULL){
680 *status = U_MEMORY_ALLOCATION_ERROR;
b331163b 681 return 0;
374ca955
A
682 }
683
684 *status = U_ZERO_ERROR; // reset error
b331163b
A
685 b1Len = usprep_map(profile, src, srcLength,
686 b1, s1.getCapacity(), options, parseError, status);
687 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
688 }
689 if(U_FAILURE(*status)){
690 return 0;
374ca955
A
691 }
692
693 // normalize
b331163b
A
694 UnicodeString s2;
695 if(profile->doNFKC){
696 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
697 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
698 if(U_FAILURE(*status)){
699 return 0;
374ca955 700 }
b331163b 701 fn2.normalize(s1, s2, *status);
374ca955 702 }else{
b331163b 703 s2.fastCopyFrom(s1);
374ca955 704 }
374ca955 705 if(U_FAILURE(*status)){
b331163b 706 return 0;
374ca955
A
707 }
708
374ca955 709 // Prohibit and checkBiDi in one pass
b331163b
A
710 const UChar *b2 = s2.getBuffer();
711 int32_t b2Len = s2.length();
712 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
713 UBool leftToRight=FALSE, rightToLeft=FALSE;
714 int32_t rtlPos =-1, ltrPos =-1;
374ca955 715
b331163b
A
716 for(int32_t b2Index=0; b2Index<b2Len;){
717 UChar32 ch = 0;
374ca955
A
718 U16_NEXT(b2, b2Index, b2Len, ch);
719
b331163b 720 uint16_t result;
374ca955 721 UTRIE_GET16(&profile->sprepTrie,ch,result);
b331163b
A
722
723 int16_t value;
724 UBool isIndex;
725 UStringPrepType type = getValues(result, value, isIndex);
374ca955
A
726
727 if( type == USPREP_PROHIBITED ||
728 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
729 ){
730 *status = U_STRINGPREP_PROHIBITED_ERROR;
731 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
b331163b 732 return 0;
374ca955
A
733 }
734
73c04bcf
A
735 if(profile->checkBiDi) {
736 direction = ubidi_getClass(profile->bdp, ch);
737 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
738 firstCharDir = direction;
739 }
740 if(direction == U_LEFT_TO_RIGHT){
741 leftToRight = TRUE;
742 ltrPos = b2Index-1;
743 }
744 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
745 rightToLeft = TRUE;
746 rtlPos = b2Index-1;
747 }
374ca955 748 }
b331163b 749 }
374ca955
A
750 if(profile->checkBiDi == TRUE){
751 // satisfy 2
752 if( leftToRight == TRUE && rightToLeft == TRUE){
753 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
754 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
b331163b 755 return 0;
374ca955
A
756 }
757
758 //satisfy 3
759 if( rightToLeft == TRUE &&
760 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
761 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
762 ){
763 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
764 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
765 return FALSE;
766 }
767 }
b331163b 768 return s2.extract(dest, destCapacity, *status);
374ca955
A
769}
770
771
772/* data swapping ------------------------------------------------------------ */
773
774U_CAPI int32_t U_EXPORT2
775usprep_swap(const UDataSwapper *ds,
776 const void *inData, int32_t length, void *outData,
777 UErrorCode *pErrorCode) {
778 const UDataInfo *pInfo;
779 int32_t headerSize;
780
781 const uint8_t *inBytes;
782 uint8_t *outBytes;
783
784 const int32_t *inIndexes;
785 int32_t indexes[16];
786
787 int32_t i, offset, count, size;
788
789 /* udata_swapDataHeader checks the arguments */
790 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
791 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
792 return 0;
793 }
794
795 /* check data format and format version */
796 pInfo=(const UDataInfo *)((const char *)inData+4);
797 if(!(
798 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
799 pInfo->dataFormat[1]==0x50 &&
800 pInfo->dataFormat[2]==0x52 &&
801 pInfo->dataFormat[3]==0x50 &&
802 pInfo->formatVersion[0]==3
803 )) {
804 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
805 pInfo->dataFormat[0], pInfo->dataFormat[1],
806 pInfo->dataFormat[2], pInfo->dataFormat[3],
807 pInfo->formatVersion[0]);
808 *pErrorCode=U_UNSUPPORTED_ERROR;
809 return 0;
810 }
811
812 inBytes=(const uint8_t *)inData+headerSize;
813 outBytes=(uint8_t *)outData+headerSize;
814
815 inIndexes=(const int32_t *)inBytes;
816
817 if(length>=0) {
818 length-=headerSize;
819 if(length<16*4) {
820 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821 length);
822 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
823 return 0;
824 }
825 }
826
827 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
828 for(i=0; i<16; ++i) {
829 indexes[i]=udata_readInt32(ds, inIndexes[i]);
830 }
831
832 /* calculate the total length of the data */
833 size=
834 16*4+ /* size of indexes[] */
835 indexes[_SPREP_INDEX_TRIE_SIZE]+
836 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
837
838 if(length>=0) {
839 if(length<size) {
840 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841 length);
842 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
843 return 0;
844 }
845
846 /* copy the data for inaccessible bytes */
847 if(inBytes!=outBytes) {
848 uprv_memcpy(outBytes, inBytes, size);
849 }
850
851 offset=0;
852
853 /* swap the int32_t indexes[] */
854 count=16*4;
855 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
856 offset+=count;
857
858 /* swap the UTrie */
859 count=indexes[_SPREP_INDEX_TRIE_SIZE];
860 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
861 offset+=count;
862
863 /* swap the uint16_t mappingTable[] */
864 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
865 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
b331163b 866 //offset+=count;
374ca955
A
867 }
868
869 return headerSize+size;
870}
871
872#endif /* #if !UCONFIG_NO_IDNA */