]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
374ca955
A
1/*
2 *******************************************************************************
3 *
51004dcb 4 * Copyright (C) 2003-2012, International Business Machines
374ca955
A
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: usprep.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_IDNA
20
21#include "unicode/usprep.h"
22
23#include "unicode/unorm.h"
24#include "unicode/ustring.h"
25#include "unicode/uchar.h"
26#include "unicode/uversion.h"
27#include "umutex.h"
28#include "cmemory.h"
29#include "sprpimpl.h"
30#include "ustr_imp.h"
31#include "uhash.h"
32#include "cstring.h"
33#include "udataswp.h"
34#include "ucln_cmn.h"
73c04bcf 35#include "ubidi_props.h"
374ca955 36
729e4ab9
A
37U_NAMESPACE_USE
38
374ca955
A
39U_CDECL_BEGIN
40
41/*
42Static cache for already opened StringPrep profiles
43*/
44static UHashtable *SHARED_DATA_HASHTABLE = NULL;
45
51004dcb 46static UMutex usprepMutex = U_MUTEX_INITIALIZER;
374ca955
A
47
48/* format version of spp file */
51004dcb 49//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
374ca955
A
50
51/* the Unicode version of the sprep data */
52static UVersionInfo dataVersion={ 0, 0, 0, 0 };
53
729e4ab9 54/* Profile names must be aligned to UStringPrepProfileType */
51004dcb 55static const char * const PROFILE_NAMES[] = {
729e4ab9
A
56 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
57 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
58 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
59 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
60 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
61 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
62 "rfc3722", /* USPREP_RFC3722_ISCSI */
63 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
64 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
65 "rfc4011", /* USPREP_RFC4011_MIB */
66 "rfc4013", /* USPREP_RFC4013_SASLPREP */
67 "rfc4505", /* USPREP_RFC4505_TRACE */
68 "rfc4518", /* USPREP_RFC4518_LDAP */
69 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
70};
71
374ca955
A
72static UBool U_CALLCONV
73isSPrepAcceptable(void * /* context */,
74 const char * /* type */,
75 const char * /* name */,
76 const UDataInfo *pInfo) {
77 if(
78 pInfo->size>=20 &&
79 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
80 pInfo->charsetFamily==U_CHARSET_FAMILY &&
81 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
82 pInfo->dataFormat[1]==0x50 &&
83 pInfo->dataFormat[2]==0x52 &&
84 pInfo->dataFormat[3]==0x50 &&
85 pInfo->formatVersion[0]==3 &&
86 pInfo->formatVersion[2]==UTRIE_SHIFT &&
87 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
88 ) {
51004dcb 89 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
374ca955
A
90 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
91 return TRUE;
92 } else {
93 return FALSE;
94 }
95}
96
97static int32_t U_CALLCONV
98getSPrepFoldingOffset(uint32_t data) {
99
100 return (int32_t)data;
101
102}
103
104/* hashes an entry */
73c04bcf 105static int32_t U_CALLCONV
374ca955
A
106hashEntry(const UHashTok parm) {
107 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
108 UHashTok namekey, pathkey;
109 namekey.pointer = b->name;
110 pathkey.pointer = b->path;
111 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
112}
113
114/* compares two entries */
73c04bcf 115static UBool U_CALLCONV
374ca955
A
116compareEntries(const UHashTok p1, const UHashTok p2) {
117 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
118 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
119 UHashTok name1, name2, path1, path2;
120 name1.pointer = b1->name;
121 name2.pointer = b2->name;
122 path1.pointer = b1->path;
123 path2.pointer = b2->path;
124 return ((UBool)(uhash_compareChars(name1, name2) &
125 uhash_compareChars(path1, path2)));
126}
127
128static void
129usprep_unload(UStringPrepProfile* data){
130 udata_close(data->sprepData);
131}
132
133static int32_t
134usprep_internal_flushCache(UBool noRefCount){
135 UStringPrepProfile *profile = NULL;
136 UStringPrepKey *key = NULL;
137 int32_t pos = -1;
138 int32_t deletedNum = 0;
139 const UHashElement *e;
140
141 /*
142 * if shared data hasn't even been lazy evaluated yet
143 * return 0
144 */
145 umtx_lock(&usprepMutex);
146 if (SHARED_DATA_HASHTABLE == NULL) {
147 umtx_unlock(&usprepMutex);
148 return 0;
149 }
150
151 /*creates an enumeration to iterate through every element in the table */
152 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
153 {
154 profile = (UStringPrepProfile *) e->value.pointer;
155 key = (UStringPrepKey *) e->key.pointer;
156
157 if ((noRefCount== FALSE && profile->refCount == 0) ||
158 noRefCount== TRUE) {
159 deletedNum++;
160 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
161
162 /* unload the data */
163 usprep_unload(profile);
164
165 if(key->name != NULL) {
166 uprv_free(key->name);
167 key->name=NULL;
168 }
169 if(key->path != NULL) {
170 uprv_free(key->path);
171 key->path=NULL;
172 }
173 uprv_free(profile);
174 uprv_free(key);
175 }
176
177 }
178 umtx_unlock(&usprepMutex);
179
180 return deletedNum;
181}
182
183/* Works just like ucnv_flushCache()
184static int32_t
185usprep_flushCache(){
186 return usprep_internal_flushCache(FALSE);
187}
188*/
189
190static UBool U_CALLCONV usprep_cleanup(void){
191 if (SHARED_DATA_HASHTABLE != NULL) {
192 usprep_internal_flushCache(TRUE);
193 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
194 uhash_close(SHARED_DATA_HASHTABLE);
195 SHARED_DATA_HASHTABLE = NULL;
196 }
197 }
198
374ca955
A
199 return (SHARED_DATA_HASHTABLE == NULL);
200}
201U_CDECL_END
202
374ca955
A
203
204/** Initializes the cache for resources */
205static void
206initCache(UErrorCode *status) {
46f4442e
A
207 UBool makeCache;
208 UMTX_CHECK(&usprepMutex, (SHARED_DATA_HASHTABLE == NULL), makeCache);
374ca955 209 if(makeCache) {
73c04bcf
A
210 UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
211 if (U_SUCCESS(*status)) {
212 umtx_lock(&usprepMutex);
213 if(SHARED_DATA_HASHTABLE == NULL) {
214 SHARED_DATA_HASHTABLE = newCache;
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216 newCache = NULL;
217 }
218 umtx_unlock(&usprepMutex);
46f4442e
A
219 }
220 if(newCache != NULL) {
221 uhash_close(newCache);
374ca955
A
222 }
223 }
224}
225
226static UBool U_CALLCONV
227loadData(UStringPrepProfile* profile,
228 const char* path,
229 const char* name,
230 const char* type,
231 UErrorCode* errorCode) {
232 /* load Unicode SPREP data from file */
233 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
234 UDataMemory *dataMemory;
235 const int32_t *p=NULL;
236 const uint8_t *pb;
237 UVersionInfo normUnicodeVersion;
238 int32_t normUniVer, sprepUniVer, normCorrVer;
239
240 if(errorCode==NULL || U_FAILURE(*errorCode)) {
241 return 0;
242 }
243
244 /* open the data outside the mutex block */
245 //TODO: change the path
246 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
247 if(U_FAILURE(*errorCode)) {
248 return FALSE;
249 }
250
251 p=(const int32_t *)udata_getMemory(dataMemory);
252 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
253 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
254 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
255
256
257 if(U_FAILURE(*errorCode)) {
258 udata_close(dataMemory);
259 return FALSE;
260 }
261
262 /* in the mutex block, set the data for this process */
263 umtx_lock(&usprepMutex);
264 if(profile->sprepData==NULL) {
265 profile->sprepData=dataMemory;
266 dataMemory=NULL;
267 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
268 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
269 } else {
270 p=(const int32_t *)udata_getMemory(profile->sprepData);
271 }
272 umtx_unlock(&usprepMutex);
273 /* initialize some variables */
274 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
275
729e4ab9 276 u_getUnicodeVersion(normUnicodeVersion);
374ca955
A
277 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
278 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
279 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
280 (dataVersion[2] << 8 ) + (dataVersion[3]);
281 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
282
283 if(U_FAILURE(*errorCode)){
284 udata_close(dataMemory);
285 return FALSE;
286 }
287 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
288 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
289 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
290 ){
291 *errorCode = U_INVALID_FORMAT_ERROR;
292 udata_close(dataMemory);
293 return FALSE;
294 }
295 profile->isDataLoaded = TRUE;
296
297 /* if a different thread set it first, then close the extra data */
298 if(dataMemory!=NULL) {
299 udata_close(dataMemory); /* NULL if it was set correctly */
300 }
301
302
303 return profile->isDataLoaded;
304}
305
306static UStringPrepProfile*
307usprep_getProfile(const char* path,
308 const char* name,
309 UErrorCode *status){
310
311 UStringPrepProfile* profile = NULL;
312
313 initCache(status);
314
315 if(U_FAILURE(*status)){
316 return NULL;
317 }
318
319 UStringPrepKey stackKey;
320 /*
321 * const is cast way to save malloc, strcpy and free calls
322 * we use the passed in pointers for fetching the data from the
323 * hash table which is safe
324 */
325 stackKey.name = (char*) name;
326 stackKey.path = (char*) path;
327
328 /* fetch the data from the cache */
73c04bcf 329 umtx_lock(&usprepMutex);
374ca955 330 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
729e4ab9
A
331 if(profile != NULL) {
332 profile->refCount++;
333 }
73c04bcf 334 umtx_unlock(&usprepMutex);
374ca955 335
729e4ab9 336 if(profile == NULL) {
374ca955 337 /* else load the data and put the data in the cache */
729e4ab9
A
338 LocalMemory<UStringPrepProfile> newProfile;
339 if(newProfile.allocateInsteadAndReset() == NULL) {
374ca955 340 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955
A
341 return NULL;
342 }
343
729e4ab9
A
344 /* load the data */
345 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
374ca955
A
346 return NULL;
347 }
348
729e4ab9
A
349 /* get the options */
350 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
351 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374ca955 352
729e4ab9
A
353 if(newProfile->checkBiDi) {
354 newProfile->bdp = ubidi_getSingleton();
355 }
374ca955 356
729e4ab9
A
357 LocalMemory<UStringPrepKey> key;
358 LocalMemory<char> keyName;
359 LocalMemory<char> keyPath;
360 if( key.allocateInsteadAndReset() == NULL ||
361 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
362 (path != NULL &&
363 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
364 ) {
365 *status = U_MEMORY_ALLOCATION_ERROR;
366 usprep_unload(newProfile.getAlias());
374ca955
A
367 return NULL;
368 }
73c04bcf 369
374ca955 370 umtx_lock(&usprepMutex);
729e4ab9
A
371 // If another thread already inserted the same key/value, refcount and cleanup our thread data
372 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
373 if(profile != NULL) {
374 profile->refCount++;
375 usprep_unload(newProfile.getAlias());
376 }
377 else {
378 /* initialize the key members */
379 key->name = keyName.orphan();
380 uprv_strcpy(key->name, name);
381 if(path != NULL){
382 key->path = keyPath.orphan();
383 uprv_strcpy(key->path, path);
384 }
385 profile = newProfile.orphan();
386
387 /* add the data object to the cache */
388 profile->refCount = 1;
389 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
390 }
374ca955
A
391 umtx_unlock(&usprepMutex);
392 }
374ca955
A
393
394 return profile;
395}
396
397U_CAPI UStringPrepProfile* U_EXPORT2
398usprep_open(const char* path,
399 const char* name,
400 UErrorCode* status){
401
402 if(status == NULL || U_FAILURE(*status)){
403 return NULL;
404 }
374ca955
A
405
406 /* initialize the profile struct members */
73c04bcf 407 return usprep_getProfile(path,name,status);
374ca955
A
408}
409
729e4ab9
A
410U_CAPI UStringPrepProfile* U_EXPORT2
411usprep_openByType(UStringPrepProfileType type,
412 UErrorCode* status) {
413 if(status == NULL || U_FAILURE(*status)){
414 return NULL;
415 }
416 int32_t index = (int32_t)type;
417 if (index < 0 || index >= (int32_t)(sizeof(PROFILE_NAMES)/sizeof(PROFILE_NAMES[0]))) {
418 *status = U_ILLEGAL_ARGUMENT_ERROR;
419 return NULL;
420 }
421 return usprep_open(NULL, PROFILE_NAMES[index], status);
422}
423
374ca955
A
424U_CAPI void U_EXPORT2
425usprep_close(UStringPrepProfile* profile){
426 if(profile==NULL){
427 return;
428 }
429
430 umtx_lock(&usprepMutex);
431 /* decrement the ref count*/
432 if(profile->refCount > 0){
433 profile->refCount--;
434 }
435 umtx_unlock(&usprepMutex);
436
437}
438
439U_CFUNC void
440uprv_syntaxError(const UChar* rules,
441 int32_t pos,
442 int32_t rulesLen,
443 UParseError* parseError){
444 if(parseError == NULL){
445 return;
446 }
447 parseError->offset = pos;
448 parseError->line = 0 ; // we are not using line numbers
449
450 // for pre-context
46f4442e 451 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
374ca955
A
452 int32_t limit = pos;
453
454 u_memcpy(parseError->preContext,rules+start,limit-start);
455 //null terminate the buffer
456 parseError->preContext[limit-start] = 0;
457
458 // for post-context; include error rules[pos]
459 start = pos;
460 limit = start + (U_PARSE_CONTEXT_LEN-1);
461 if (limit > rulesLen) {
462 limit = rulesLen;
463 }
464 if (start < rulesLen) {
465 u_memcpy(parseError->postContext,rules+start,limit-start);
466 }
467 //null terminate the buffer
468 parseError->postContext[limit-start]= 0;
469}
470
471
472static inline UStringPrepType
473getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
474
475 UStringPrepType type;
476 if(trieWord == 0){
477 /*
478 * Initial value stored in the mapping table
479 * just return USPREP_TYPE_LIMIT .. so that
480 * the source codepoint is copied to the destination
481 */
482 type = USPREP_TYPE_LIMIT;
73c04bcf
A
483 isIndex =FALSE;
484 value = 0;
374ca955
A
485 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
486 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
487 isIndex =FALSE;
488 value = 0;
374ca955
A
489 }else{
490 /* get the type */
491 type = USPREP_MAP;
492 /* ascertain if the value is index or delta */
493 if(trieWord & 0x02){
494 isIndex = TRUE;
495 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
496 }else{
497 isIndex = FALSE;
498 value = (int16_t)trieWord;
499 value = (value >> 2);
374ca955
A
500 }
501
502 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
503 type = USPREP_DELETE;
504 isIndex =FALSE;
505 value = 0;
506 }
507 }
508 return type;
509}
510
511
512
513static int32_t
514usprep_map( const UStringPrepProfile* profile,
515 const UChar* src, int32_t srcLength,
516 UChar* dest, int32_t destCapacity,
517 int32_t options,
518 UParseError* parseError,
519 UErrorCode* status ){
520
521 uint16_t result;
522 int32_t destIndex=0;
523 int32_t srcIndex;
524 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
525 UStringPrepType type;
526 int16_t value;
527 UBool isIndex;
528 const int32_t* indexes = profile->indexes;
529
530 // no error checking the caller check for error and arguments
531 // no string length check the caller finds out the string length
532
533 for(srcIndex=0;srcIndex<srcLength;){
534 UChar32 ch;
535
536 U16_NEXT(src,srcIndex,srcLength,ch);
537
538 result=0;
539
540 UTRIE_GET16(&profile->sprepTrie,ch,result);
541
542 type = getValues(result, value, isIndex);
543
544 // check if the source codepoint is unassigned
545 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
546
547 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
548 *status = U_STRINGPREP_UNASSIGNED_ERROR;
549 return 0;
550
551 }else if(type == USPREP_MAP){
552
553 int32_t index, length;
554
555 if(isIndex){
556 index = value;
557 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
559 length = 1;
560 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
561 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
562 length = 2;
563 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
564 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
565 length = 3;
566 }else{
567 length = profile->mappingData[index++];
568
569 }
570
571 /* copy mapping to destination */
572 for(int32_t i=0; i< length; i++){
573 if(destIndex < destCapacity ){
574 dest[destIndex] = profile->mappingData[index+i];
575 }
576 destIndex++; /* for pre-flighting */
577 }
578 continue;
579 }else{
580 // subtract the delta to arrive at the code point
581 ch -= value;
582 }
583
584 }else if(type==USPREP_DELETE){
585 // just consume the codepoint and contine
586 continue;
587 }
588 //copy the code point into destination
589 if(ch <= 0xFFFF){
590 if(destIndex < destCapacity ){
591 dest[destIndex] = (UChar)ch;
592 }
593 destIndex++;
594 }else{
595 if(destIndex+1 < destCapacity ){
596 dest[destIndex] = U16_LEAD(ch);
597 dest[destIndex+1] = U16_TRAIL(ch);
598 }
599 destIndex +=2;
600 }
601
602 }
603
604 return u_terminateUChars(dest, destCapacity, destIndex, status);
605}
606
607
608static int32_t
609usprep_normalize( const UChar* src, int32_t srcLength,
610 UChar* dest, int32_t destCapacity,
611 UErrorCode* status ){
374ca955
A
612 return unorm_normalize(
613 src, srcLength,
729e4ab9 614 UNORM_NFKC, UNORM_UNICODE_3_2,
374ca955
A
615 dest, destCapacity,
616 status);
617}
618
619
620 /*
621 1) Map -- For each character in the input, check if it has a mapping
622 and, if so, replace it with its mapping.
623
624 2) Normalize -- Possibly normalize the result of step 1 using Unicode
625 normalization.
626
627 3) Prohibit -- Check for any characters that are not allowed in the
628 output. If any are found, return an error.
629
630 4) Check bidi -- Possibly check for right-to-left characters, and if
631 any are found, make sure that the whole string satisfies the
632 requirements for bidirectional strings. If the string does not
633 satisfy the requirements for bidirectional strings, return an
634 error.
635 [Unicode3.2] defines several bidirectional categories; each character
636 has one bidirectional category assigned to it. For the purposes of
637 the requirements below, an "RandALCat character" is a character that
638 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
639 is a character that has Unicode bidirectional category "L". Note
640
641
642 that there are many characters which fall in neither of the above
643 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
644 this because they have bidirectional category "EN".
645
646 In any profile that specifies bidirectional character handling, all
647 three of the following requirements MUST be met:
648
649 1) The characters in section 5.8 MUST be prohibited.
650
651 2) If a string contains any RandALCat character, the string MUST NOT
652 contain any LCat character.
653
654 3) If a string contains any RandALCat character, a RandALCat
655 character MUST be the first character of the string, and a
656 RandALCat character MUST be the last character of the string.
657*/
658
659#define MAX_STACK_BUFFER_SIZE 300
660
661
662U_CAPI int32_t U_EXPORT2
663usprep_prepare( const UStringPrepProfile* profile,
664 const UChar* src, int32_t srcLength,
665 UChar* dest, int32_t destCapacity,
666 int32_t options,
667 UParseError* parseError,
668 UErrorCode* status ){
669
670 // check error status
671 if(status == NULL || U_FAILURE(*status)){
672 return 0;
673 }
674
675 //check arguments
676 if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
677 *status=U_ILLEGAL_ARGUMENT_ERROR;
678 return 0;
679 }
680
681 UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
682 UChar *b1 = b1Stack, *b2 = b2Stack;
683 int32_t b1Len, b2Len=0,
684 b1Capacity = MAX_STACK_BUFFER_SIZE ,
685 b2Capacity = MAX_STACK_BUFFER_SIZE;
686 uint16_t result;
687 int32_t b2Index = 0;
688 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
689 UBool leftToRight=FALSE, rightToLeft=FALSE;
690 int32_t rtlPos =-1, ltrPos =-1;
691
692 //get the string length
693 if(srcLength == -1){
694 srcLength = u_strlen(src);
695 }
696 // map
697 b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
698
699 if(*status == U_BUFFER_OVERFLOW_ERROR){
700 // redo processing of string
701 /* we do not have enough room so grow the buffer*/
702 b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
703 if(b1==NULL){
704 *status = U_MEMORY_ALLOCATION_ERROR;
705 goto CLEANUP;
706 }
707
708 *status = U_ZERO_ERROR; // reset error
709
710 b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
711
712 }
713
714 // normalize
715 if(profile->doNFKC == TRUE){
716 b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
717
718 if(*status == U_BUFFER_OVERFLOW_ERROR){
719 // redo processing of string
720 /* we do not have enough room so grow the buffer*/
721 b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
722 if(b2==NULL){
723 *status = U_MEMORY_ALLOCATION_ERROR;
724 goto CLEANUP;
725 }
726
727 *status = U_ZERO_ERROR; // reset error
728
729 b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
730
731 }
732
733 }else{
734 b2 = b1;
735 b2Len = b1Len;
736 }
737
738
739 if(U_FAILURE(*status)){
740 goto CLEANUP;
741 }
742
743 UChar32 ch;
744 UStringPrepType type;
745 int16_t value;
746 UBool isIndex;
747
748 // Prohibit and checkBiDi in one pass
749 for(b2Index=0; b2Index<b2Len;){
750
751 ch = 0;
752
753 U16_NEXT(b2, b2Index, b2Len, ch);
754
755 UTRIE_GET16(&profile->sprepTrie,ch,result);
756
757 type = getValues(result, value, isIndex);
758
759 if( type == USPREP_PROHIBITED ||
760 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
761 ){
762 *status = U_STRINGPREP_PROHIBITED_ERROR;
763 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
764 goto CLEANUP;
765 }
766
73c04bcf
A
767 if(profile->checkBiDi) {
768 direction = ubidi_getClass(profile->bdp, ch);
769 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
770 firstCharDir = direction;
771 }
772 if(direction == U_LEFT_TO_RIGHT){
773 leftToRight = TRUE;
774 ltrPos = b2Index-1;
775 }
776 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
777 rightToLeft = TRUE;
778 rtlPos = b2Index-1;
779 }
374ca955
A
780 }
781 }
782 if(profile->checkBiDi == TRUE){
783 // satisfy 2
784 if( leftToRight == TRUE && rightToLeft == TRUE){
785 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
786 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
787 goto CLEANUP;
788 }
789
790 //satisfy 3
791 if( rightToLeft == TRUE &&
792 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
793 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
794 ){
795 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
796 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
797 return FALSE;
798 }
799 }
73c04bcf 800 if(b2Len>0 && b2Len <= destCapacity){
374ca955
A
801 uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
802 }
803
804CLEANUP:
805 if(b1!=b1Stack){
806 uprv_free(b1);
807 b1=NULL;
808 }
809
810 if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
811 uprv_free(b2);
812 b2=NULL;
813 }
814 return u_terminateUChars(dest, destCapacity, b2Len, status);
815}
816
817
818/* data swapping ------------------------------------------------------------ */
819
820U_CAPI int32_t U_EXPORT2
821usprep_swap(const UDataSwapper *ds,
822 const void *inData, int32_t length, void *outData,
823 UErrorCode *pErrorCode) {
824 const UDataInfo *pInfo;
825 int32_t headerSize;
826
827 const uint8_t *inBytes;
828 uint8_t *outBytes;
829
830 const int32_t *inIndexes;
831 int32_t indexes[16];
832
833 int32_t i, offset, count, size;
834
835 /* udata_swapDataHeader checks the arguments */
836 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
837 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
838 return 0;
839 }
840
841 /* check data format and format version */
842 pInfo=(const UDataInfo *)((const char *)inData+4);
843 if(!(
844 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
845 pInfo->dataFormat[1]==0x50 &&
846 pInfo->dataFormat[2]==0x52 &&
847 pInfo->dataFormat[3]==0x50 &&
848 pInfo->formatVersion[0]==3
849 )) {
850 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
851 pInfo->dataFormat[0], pInfo->dataFormat[1],
852 pInfo->dataFormat[2], pInfo->dataFormat[3],
853 pInfo->formatVersion[0]);
854 *pErrorCode=U_UNSUPPORTED_ERROR;
855 return 0;
856 }
857
858 inBytes=(const uint8_t *)inData+headerSize;
859 outBytes=(uint8_t *)outData+headerSize;
860
861 inIndexes=(const int32_t *)inBytes;
862
863 if(length>=0) {
864 length-=headerSize;
865 if(length<16*4) {
866 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
867 length);
868 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
869 return 0;
870 }
871 }
872
873 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
874 for(i=0; i<16; ++i) {
875 indexes[i]=udata_readInt32(ds, inIndexes[i]);
876 }
877
878 /* calculate the total length of the data */
879 size=
880 16*4+ /* size of indexes[] */
881 indexes[_SPREP_INDEX_TRIE_SIZE]+
882 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
883
884 if(length>=0) {
885 if(length<size) {
886 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
887 length);
888 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
889 return 0;
890 }
891
892 /* copy the data for inaccessible bytes */
893 if(inBytes!=outBytes) {
894 uprv_memcpy(outBytes, inBytes, size);
895 }
896
897 offset=0;
898
899 /* swap the int32_t indexes[] */
900 count=16*4;
901 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
902 offset+=count;
903
904 /* swap the UTrie */
905 count=indexes[_SPREP_INDEX_TRIE_SIZE];
906 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
907 offset+=count;
908
909 /* swap the uint16_t mappingTable[] */
910 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
911 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
912 offset+=count;
913 }
914
915 return headerSize+size;
916}
917
918#endif /* #if !UCONFIG_NO_IDNA */