]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
374ca955
A
1/*
2 *******************************************************************************
3 *
57a6839d 4 * Copyright (C) 2003-2013, International Business Machines
374ca955
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/usprep.h"
22
23#include "unicode/unorm.h"
24#include "unicode/ustring.h"
25#include "unicode/uchar.h"
26#include "unicode/uversion.h"
27#include "umutex.h"
28#include "cmemory.h"
29#include "sprpimpl.h"
30#include "ustr_imp.h"
31#include "uhash.h"
32#include "cstring.h"
33#include "udataswp.h"
34#include "ucln_cmn.h"
73c04bcf 35#include "ubidi_props.h"
374ca955 36
729e4ab9
A
37U_NAMESPACE_USE
38
374ca955
A
39U_CDECL_BEGIN
40
41/*
42Static cache for already opened StringPrep profiles
43*/
44static UHashtable *SHARED_DATA_HASHTABLE = NULL;
57a6839d 45static icu::UInitOnce gSharedDataInitOnce;
374ca955 46
51004dcb 47static UMutex usprepMutex = U_MUTEX_INITIALIZER;
374ca955
A
48
49/* format version of spp file */
51004dcb 50//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
374ca955
A
51
52/* the Unicode version of the sprep data */
53static UVersionInfo dataVersion={ 0, 0, 0, 0 };
54
729e4ab9 55/* Profile names must be aligned to UStringPrepProfileType */
51004dcb 56static const char * const PROFILE_NAMES[] = {
729e4ab9
A
57 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
58 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
59 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
60 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
61 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
62 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
63 "rfc3722", /* USPREP_RFC3722_ISCSI */
64 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
65 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
66 "rfc4011", /* USPREP_RFC4011_MIB */
67 "rfc4013", /* USPREP_RFC4013_SASLPREP */
68 "rfc4505", /* USPREP_RFC4505_TRACE */
69 "rfc4518", /* USPREP_RFC4518_LDAP */
70 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
71};
72
374ca955
A
73static UBool U_CALLCONV
74isSPrepAcceptable(void * /* context */,
75 const char * /* type */,
76 const char * /* name */,
77 const UDataInfo *pInfo) {
78 if(
79 pInfo->size>=20 &&
80 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
81 pInfo->charsetFamily==U_CHARSET_FAMILY &&
82 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
83 pInfo->dataFormat[1]==0x50 &&
84 pInfo->dataFormat[2]==0x52 &&
85 pInfo->dataFormat[3]==0x50 &&
86 pInfo->formatVersion[0]==3 &&
87 pInfo->formatVersion[2]==UTRIE_SHIFT &&
88 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
89 ) {
51004dcb 90 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
374ca955
A
91 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
92 return TRUE;
93 } else {
94 return FALSE;
95 }
96}
97
98static int32_t U_CALLCONV
99getSPrepFoldingOffset(uint32_t data) {
100
101 return (int32_t)data;
102
103}
104
105/* hashes an entry */
73c04bcf 106static int32_t U_CALLCONV
374ca955
A
107hashEntry(const UHashTok parm) {
108 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
109 UHashTok namekey, pathkey;
110 namekey.pointer = b->name;
111 pathkey.pointer = b->path;
112 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
113}
114
115/* compares two entries */
73c04bcf 116static UBool U_CALLCONV
374ca955
A
117compareEntries(const UHashTok p1, const UHashTok p2) {
118 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
119 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
120 UHashTok name1, name2, path1, path2;
121 name1.pointer = b1->name;
122 name2.pointer = b2->name;
123 path1.pointer = b1->path;
124 path2.pointer = b2->path;
125 return ((UBool)(uhash_compareChars(name1, name2) &
126 uhash_compareChars(path1, path2)));
127}
128
129static void
130usprep_unload(UStringPrepProfile* data){
131 udata_close(data->sprepData);
132}
133
134static int32_t
135usprep_internal_flushCache(UBool noRefCount){
136 UStringPrepProfile *profile = NULL;
137 UStringPrepKey *key = NULL;
138 int32_t pos = -1;
139 int32_t deletedNum = 0;
140 const UHashElement *e;
141
142 /*
143 * if shared data hasn't even been lazy evaluated yet
144 * return 0
145 */
146 umtx_lock(&usprepMutex);
147 if (SHARED_DATA_HASHTABLE == NULL) {
148 umtx_unlock(&usprepMutex);
149 return 0;
150 }
151
152 /*creates an enumeration to iterate through every element in the table */
153 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
154 {
155 profile = (UStringPrepProfile *) e->value.pointer;
156 key = (UStringPrepKey *) e->key.pointer;
157
158 if ((noRefCount== FALSE && profile->refCount == 0) ||
159 noRefCount== TRUE) {
160 deletedNum++;
161 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
162
163 /* unload the data */
164 usprep_unload(profile);
165
166 if(key->name != NULL) {
167 uprv_free(key->name);
168 key->name=NULL;
169 }
170 if(key->path != NULL) {
171 uprv_free(key->path);
172 key->path=NULL;
173 }
174 uprv_free(profile);
175 uprv_free(key);
176 }
177
178 }
179 umtx_unlock(&usprepMutex);
180
181 return deletedNum;
182}
183
184/* Works just like ucnv_flushCache()
185static int32_t
186usprep_flushCache(){
187 return usprep_internal_flushCache(FALSE);
188}
189*/
190
191static UBool U_CALLCONV usprep_cleanup(void){
192 if (SHARED_DATA_HASHTABLE != NULL) {
193 usprep_internal_flushCache(TRUE);
194 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
195 uhash_close(SHARED_DATA_HASHTABLE);
196 SHARED_DATA_HASHTABLE = NULL;
197 }
198 }
57a6839d 199 gSharedDataInitOnce.reset();
374ca955
A
200 return (SHARED_DATA_HASHTABLE == NULL);
201}
202U_CDECL_END
203
374ca955
A
204
205/** Initializes the cache for resources */
57a6839d
A
206static void U_CALLCONV
207createCache(UErrorCode &status) {
208 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
209 if (U_FAILURE(status)) {
210 SHARED_DATA_HASHTABLE = NULL;
211 }
212 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
213}
214
374ca955
A
215static void
216initCache(UErrorCode *status) {
57a6839d 217 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
374ca955
A
218}
219
220static UBool U_CALLCONV
221loadData(UStringPrepProfile* profile,
222 const char* path,
223 const char* name,
224 const char* type,
225 UErrorCode* errorCode) {
226 /* load Unicode SPREP data from file */
227 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
228 UDataMemory *dataMemory;
229 const int32_t *p=NULL;
230 const uint8_t *pb;
231 UVersionInfo normUnicodeVersion;
232 int32_t normUniVer, sprepUniVer, normCorrVer;
233
234 if(errorCode==NULL || U_FAILURE(*errorCode)) {
235 return 0;
236 }
237
238 /* open the data outside the mutex block */
239 //TODO: change the path
240 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
241 if(U_FAILURE(*errorCode)) {
242 return FALSE;
243 }
244
245 p=(const int32_t *)udata_getMemory(dataMemory);
246 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
247 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
248 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
249
250
251 if(U_FAILURE(*errorCode)) {
252 udata_close(dataMemory);
253 return FALSE;
254 }
255
256 /* in the mutex block, set the data for this process */
257 umtx_lock(&usprepMutex);
258 if(profile->sprepData==NULL) {
259 profile->sprepData=dataMemory;
260 dataMemory=NULL;
261 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
262 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
263 } else {
264 p=(const int32_t *)udata_getMemory(profile->sprepData);
265 }
266 umtx_unlock(&usprepMutex);
267 /* initialize some variables */
268 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
269
729e4ab9 270 u_getUnicodeVersion(normUnicodeVersion);
374ca955
A
271 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
272 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
273 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
274 (dataVersion[2] << 8 ) + (dataVersion[3]);
275 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
276
277 if(U_FAILURE(*errorCode)){
278 udata_close(dataMemory);
279 return FALSE;
280 }
281 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
282 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
283 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
284 ){
285 *errorCode = U_INVALID_FORMAT_ERROR;
286 udata_close(dataMemory);
287 return FALSE;
288 }
289 profile->isDataLoaded = TRUE;
290
291 /* if a different thread set it first, then close the extra data */
292 if(dataMemory!=NULL) {
293 udata_close(dataMemory); /* NULL if it was set correctly */
294 }
295
296
297 return profile->isDataLoaded;
298}
299
300static UStringPrepProfile*
301usprep_getProfile(const char* path,
302 const char* name,
303 UErrorCode *status){
304
305 UStringPrepProfile* profile = NULL;
306
307 initCache(status);
308
309 if(U_FAILURE(*status)){
310 return NULL;
311 }
312
313 UStringPrepKey stackKey;
314 /*
315 * const is cast way to save malloc, strcpy and free calls
316 * we use the passed in pointers for fetching the data from the
317 * hash table which is safe
318 */
319 stackKey.name = (char*) name;
320 stackKey.path = (char*) path;
321
322 /* fetch the data from the cache */
73c04bcf 323 umtx_lock(&usprepMutex);
374ca955 324 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
729e4ab9
A
325 if(profile != NULL) {
326 profile->refCount++;
327 }
73c04bcf 328 umtx_unlock(&usprepMutex);
374ca955 329
729e4ab9 330 if(profile == NULL) {
374ca955 331 /* else load the data and put the data in the cache */
729e4ab9
A
332 LocalMemory<UStringPrepProfile> newProfile;
333 if(newProfile.allocateInsteadAndReset() == NULL) {
374ca955 334 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955
A
335 return NULL;
336 }
337
729e4ab9
A
338 /* load the data */
339 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
374ca955
A
340 return NULL;
341 }
342
729e4ab9
A
343 /* get the options */
344 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
345 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374ca955 346
729e4ab9
A
347 if(newProfile->checkBiDi) {
348 newProfile->bdp = ubidi_getSingleton();
349 }
374ca955 350
729e4ab9
A
351 LocalMemory<UStringPrepKey> key;
352 LocalMemory<char> keyName;
353 LocalMemory<char> keyPath;
354 if( key.allocateInsteadAndReset() == NULL ||
355 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
356 (path != NULL &&
357 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
358 ) {
359 *status = U_MEMORY_ALLOCATION_ERROR;
360 usprep_unload(newProfile.getAlias());
374ca955
A
361 return NULL;
362 }
73c04bcf 363
374ca955 364 umtx_lock(&usprepMutex);
729e4ab9
A
365 // If another thread already inserted the same key/value, refcount and cleanup our thread data
366 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
367 if(profile != NULL) {
368 profile->refCount++;
369 usprep_unload(newProfile.getAlias());
370 }
371 else {
372 /* initialize the key members */
373 key->name = keyName.orphan();
374 uprv_strcpy(key->name, name);
375 if(path != NULL){
376 key->path = keyPath.orphan();
377 uprv_strcpy(key->path, path);
378 }
379 profile = newProfile.orphan();
380
381 /* add the data object to the cache */
382 profile->refCount = 1;
383 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
384 }
374ca955
A
385 umtx_unlock(&usprepMutex);
386 }
374ca955
A
387
388 return profile;
389}
390
391U_CAPI UStringPrepProfile* U_EXPORT2
392usprep_open(const char* path,
393 const char* name,
394 UErrorCode* status){
395
396 if(status == NULL || U_FAILURE(*status)){
397 return NULL;
398 }
374ca955
A
399
400 /* initialize the profile struct members */
73c04bcf 401 return usprep_getProfile(path,name,status);
374ca955
A
402}
403
729e4ab9
A
404U_CAPI UStringPrepProfile* U_EXPORT2
405usprep_openByType(UStringPrepProfileType type,
406 UErrorCode* status) {
407 if(status == NULL || U_FAILURE(*status)){
408 return NULL;
409 }
410 int32_t index = (int32_t)type;
411 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
412 *status = U_ILLEGAL_ARGUMENT_ERROR;
413 return NULL;
414 }
415 return usprep_open(NULL, PROFILE_NAMES[index], status);
416}
417
374ca955
A
418U_CAPI void U_EXPORT2
419usprep_close(UStringPrepProfile* profile){
420 if(profile==NULL){
421 return;
422 }
423
424 umtx_lock(&usprepMutex);
425 /* decrement the ref count*/
426 if(profile->refCount > 0){
427 profile->refCount--;
428 }
429 umtx_unlock(&usprepMutex);
430
431}
432
433U_CFUNC void
434uprv_syntaxError(const UChar* rules,
435 int32_t pos,
436 int32_t rulesLen,
437 UParseError* parseError){
438 if(parseError == NULL){
439 return;
440 }
441 parseError->offset = pos;
442 parseError->line = 0 ; // we are not using line numbers
443
444 // for pre-context
46f4442e 445 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
374ca955
A
446 int32_t limit = pos;
447
448 u_memcpy(parseError->preContext,rules+start,limit-start);
449 //null terminate the buffer
450 parseError->preContext[limit-start] = 0;
451
452 // for post-context; include error rules[pos]
453 start = pos;
454 limit = start + (U_PARSE_CONTEXT_LEN-1);
455 if (limit > rulesLen) {
456 limit = rulesLen;
457 }
458 if (start < rulesLen) {
459 u_memcpy(parseError->postContext,rules+start,limit-start);
460 }
461 //null terminate the buffer
462 parseError->postContext[limit-start]= 0;
463}
464
465
466static inline UStringPrepType
467getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
468
469 UStringPrepType type;
470 if(trieWord == 0){
471 /*
472 * Initial value stored in the mapping table
473 * just return USPREP_TYPE_LIMIT .. so that
474 * the source codepoint is copied to the destination
475 */
476 type = USPREP_TYPE_LIMIT;
73c04bcf
A
477 isIndex =FALSE;
478 value = 0;
374ca955
A
479 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
480 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
481 isIndex =FALSE;
482 value = 0;
374ca955
A
483 }else{
484 /* get the type */
485 type = USPREP_MAP;
486 /* ascertain if the value is index or delta */
487 if(trieWord & 0x02){
488 isIndex = TRUE;
489 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
490 }else{
491 isIndex = FALSE;
492 value = (int16_t)trieWord;
493 value = (value >> 2);
374ca955
A
494 }
495
496 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
497 type = USPREP_DELETE;
498 isIndex =FALSE;
499 value = 0;
500 }
501 }
502 return type;
503}
504
505
506
507static int32_t
508usprep_map( const UStringPrepProfile* profile,
509 const UChar* src, int32_t srcLength,
510 UChar* dest, int32_t destCapacity,
511 int32_t options,
512 UParseError* parseError,
513 UErrorCode* status ){
514
515 uint16_t result;
516 int32_t destIndex=0;
517 int32_t srcIndex;
518 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
519 UStringPrepType type;
520 int16_t value;
521 UBool isIndex;
522 const int32_t* indexes = profile->indexes;
523
524 // no error checking the caller check for error and arguments
525 // no string length check the caller finds out the string length
526
527 for(srcIndex=0;srcIndex<srcLength;){
528 UChar32 ch;
529
530 U16_NEXT(src,srcIndex,srcLength,ch);
531
532 result=0;
533
534 UTRIE_GET16(&profile->sprepTrie,ch,result);
535
536 type = getValues(result, value, isIndex);
537
538 // check if the source codepoint is unassigned
539 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
540
541 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
542 *status = U_STRINGPREP_UNASSIGNED_ERROR;
543 return 0;
544
545 }else if(type == USPREP_MAP){
546
547 int32_t index, length;
548
549 if(isIndex){
550 index = value;
551 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
552 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
553 length = 1;
554 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
556 length = 2;
557 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
559 length = 3;
560 }else{
561 length = profile->mappingData[index++];
562
563 }
564
565 /* copy mapping to destination */
566 for(int32_t i=0; i< length; i++){
567 if(destIndex < destCapacity ){
568 dest[destIndex] = profile->mappingData[index+i];
569 }
570 destIndex++; /* for pre-flighting */
571 }
572 continue;
573 }else{
574 // subtract the delta to arrive at the code point
575 ch -= value;
576 }
577
578 }else if(type==USPREP_DELETE){
579 // just consume the codepoint and contine
580 continue;
581 }
582 //copy the code point into destination
583 if(ch <= 0xFFFF){
584 if(destIndex < destCapacity ){
585 dest[destIndex] = (UChar)ch;
586 }
587 destIndex++;
588 }else{
589 if(destIndex+1 < destCapacity ){
590 dest[destIndex] = U16_LEAD(ch);
591 dest[destIndex+1] = U16_TRAIL(ch);
592 }
593 destIndex +=2;
594 }
595
596 }
597
598 return u_terminateUChars(dest, destCapacity, destIndex, status);
599}
600
601
602static int32_t
603usprep_normalize( const UChar* src, int32_t srcLength,
604 UChar* dest, int32_t destCapacity,
605 UErrorCode* status ){
374ca955
A
606 return unorm_normalize(
607 src, srcLength,
729e4ab9 608 UNORM_NFKC, UNORM_UNICODE_3_2,
374ca955
A
609 dest, destCapacity,
610 status);
611}
612
613
614 /*
615 1) Map -- For each character in the input, check if it has a mapping
616 and, if so, replace it with its mapping.
617
618 2) Normalize -- Possibly normalize the result of step 1 using Unicode
619 normalization.
620
621 3) Prohibit -- Check for any characters that are not allowed in the
622 output. If any are found, return an error.
623
624 4) Check bidi -- Possibly check for right-to-left characters, and if
625 any are found, make sure that the whole string satisfies the
626 requirements for bidirectional strings. If the string does not
627 satisfy the requirements for bidirectional strings, return an
628 error.
629 [Unicode3.2] defines several bidirectional categories; each character
630 has one bidirectional category assigned to it. For the purposes of
631 the requirements below, an "RandALCat character" is a character that
632 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
633 is a character that has Unicode bidirectional category "L". Note
634
635
636 that there are many characters which fall in neither of the above
637 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
638 this because they have bidirectional category "EN".
639
640 In any profile that specifies bidirectional character handling, all
641 three of the following requirements MUST be met:
642
643 1) The characters in section 5.8 MUST be prohibited.
644
645 2) If a string contains any RandALCat character, the string MUST NOT
646 contain any LCat character.
647
648 3) If a string contains any RandALCat character, a RandALCat
649 character MUST be the first character of the string, and a
650 RandALCat character MUST be the last character of the string.
651*/
652
653#define MAX_STACK_BUFFER_SIZE 300
654
655
656U_CAPI int32_t U_EXPORT2
657usprep_prepare( const UStringPrepProfile* profile,
658 const UChar* src, int32_t srcLength,
659 UChar* dest, int32_t destCapacity,
660 int32_t options,
661 UParseError* parseError,
662 UErrorCode* status ){
663
664 // check error status
665 if(status == NULL || U_FAILURE(*status)){
666 return 0;
667 }
668
669 //check arguments
670 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
671 *status=U_ILLEGAL_ARGUMENT_ERROR;
672 return 0;
673 }
674
675 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
676 UChar *b1 = b1Stack, *b2 = b2Stack;
677 int32_t b1Len, b2Len=0,
678 b1Capacity = MAX_STACK_BUFFER_SIZE ,
679 b2Capacity = MAX_STACK_BUFFER_SIZE;
680 uint16_t result;
681 int32_t b2Index = 0;
682 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
683 UBool leftToRight=FALSE, rightToLeft=FALSE;
684 int32_t rtlPos =-1, ltrPos =-1;
685
686 //get the string length
687 if(srcLength == -1){
688 srcLength = u_strlen(src);
689 }
690 // map
691 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
692
693 if(*status == U_BUFFER_OVERFLOW_ERROR){
694 // redo processing of string
695 /* we do not have enough room so grow the buffer*/
696 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
697 if(b1==NULL){
698 *status = U_MEMORY_ALLOCATION_ERROR;
699 goto CLEANUP;
700 }
701
702 *status = U_ZERO_ERROR; // reset error
703
704 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
705
706 }
707
708 // normalize
709 if(profile->doNFKC == TRUE){
710 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
711
712 if(*status == U_BUFFER_OVERFLOW_ERROR){
713 // redo processing of string
714 /* we do not have enough room so grow the buffer*/
715 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
716 if(b2==NULL){
717 *status = U_MEMORY_ALLOCATION_ERROR;
718 goto CLEANUP;
719 }
720
721 *status = U_ZERO_ERROR; // reset error
722
723 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
724
725 }
726
727 }else{
728 b2 = b1;
729 b2Len = b1Len;
730 }
731
732
733 if(U_FAILURE(*status)){
734 goto CLEANUP;
735 }
736
737 UChar32 ch;
738 UStringPrepType type;
739 int16_t value;
740 UBool isIndex;
741
742 // Prohibit and checkBiDi in one pass
743 for(b2Index=0; b2Index<b2Len;){
744
745 ch = 0;
746
747 U16_NEXT(b2, b2Index, b2Len, ch);
748
749 UTRIE_GET16(&profile->sprepTrie,ch,result);
750
751 type = getValues(result, value, isIndex);
752
753 if( type == USPREP_PROHIBITED ||
754 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
755 ){
756 *status = U_STRINGPREP_PROHIBITED_ERROR;
757 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
758 goto CLEANUP;
759 }
760
73c04bcf
A
761 if(profile->checkBiDi) {
762 direction = ubidi_getClass(profile->bdp, ch);
763 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
764 firstCharDir = direction;
765 }
766 if(direction == U_LEFT_TO_RIGHT){
767 leftToRight = TRUE;
768 ltrPos = b2Index-1;
769 }
770 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
771 rightToLeft = TRUE;
772 rtlPos = b2Index-1;
773 }
374ca955
A
774 }
775 }
776 if(profile->checkBiDi == TRUE){
777 // satisfy 2
778 if( leftToRight == TRUE && rightToLeft == TRUE){
779 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
780 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
781 goto CLEANUP;
782 }
783
784 //satisfy 3
785 if( rightToLeft == TRUE &&
786 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
787 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
788 ){
789 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
790 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
791 return FALSE;
792 }
793 }
73c04bcf 794 if(b2Len>0 && b2Len <= destCapacity){
374ca955
A
795 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
796 }
797
798CLEANUP:
799 if(b1!=b1Stack){
800 uprv_free(b1);
801 b1=NULL;
802 }
803
804 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
805 uprv_free(b2);
806 b2=NULL;
807 }
808 return u_terminateUChars(dest, destCapacity, b2Len, status);
809}
810
811
812/* data swapping ------------------------------------------------------------ */
813
814U_CAPI int32_t U_EXPORT2
815usprep_swap(const UDataSwapper *ds,
816 const void *inData, int32_t length, void *outData,
817 UErrorCode *pErrorCode) {
818 const UDataInfo *pInfo;
819 int32_t headerSize;
820
821 const uint8_t *inBytes;
822 uint8_t *outBytes;
823
824 const int32_t *inIndexes;
825 int32_t indexes[16];
826
827 int32_t i, offset, count, size;
828
829 /* udata_swapDataHeader checks the arguments */
830 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
831 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
832 return 0;
833 }
834
835 /* check data format and format version */
836 pInfo=(const UDataInfo *)((const char *)inData+4);
837 if(!(
838 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
839 pInfo->dataFormat[1]==0x50 &&
840 pInfo->dataFormat[2]==0x52 &&
841 pInfo->dataFormat[3]==0x50 &&
842 pInfo->formatVersion[0]==3
843 )) {
844 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
845 pInfo->dataFormat[0], pInfo->dataFormat[1],
846 pInfo->dataFormat[2], pInfo->dataFormat[3],
847 pInfo->formatVersion[0]);
848 *pErrorCode=U_UNSUPPORTED_ERROR;
849 return 0;
850 }
851
852 inBytes=(const uint8_t *)inData+headerSize;
853 outBytes=(uint8_t *)outData+headerSize;
854
855 inIndexes=(const int32_t *)inBytes;
856
857 if(length>=0) {
858 length-=headerSize;
859 if(length<16*4) {
860 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
861 length);
862 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
863 return 0;
864 }
865 }
866
867 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
868 for(i=0; i<16; ++i) {
869 indexes[i]=udata_readInt32(ds, inIndexes[i]);
870 }
871
872 /* calculate the total length of the data */
873 size=
874 16*4+ /* size of indexes[] */
875 indexes[_SPREP_INDEX_TRIE_SIZE]+
876 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
877
878 if(length>=0) {
879 if(length<size) {
880 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
881 length);
882 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
883 return 0;
884 }
885
886 /* copy the data for inaccessible bytes */
887 if(inBytes!=outBytes) {
888 uprv_memcpy(outBytes, inBytes, size);
889 }
890
891 offset=0;
892
893 /* swap the int32_t indexes[] */
894 count=16*4;
895 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
896 offset+=count;
897
898 /* swap the UTrie */
899 count=indexes[_SPREP_INDEX_TRIE_SIZE];
900 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
901 offset+=count;
902
903 /* swap the uint16_t mappingTable[] */
904 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
905 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
906 offset+=count;
907 }
908
909 return headerSize+size;
910}
911
912#endif /* #if !UCONFIG_NO_IDNA */