]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4 *******************************************************************************
5 *
2ca993e8 6 * Copyright (C) 2003-2016, International Business Machines
374ca955
A
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
f3c0d7a5 11 * encoding: UTF-8
374ca955
A
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_IDNA
22
23#include "unicode/usprep.h"
24
b331163b 25#include "unicode/normalizer2.h"
374ca955
A
26#include "unicode/ustring.h"
27#include "unicode/uchar.h"
28#include "unicode/uversion.h"
29#include "umutex.h"
30#include "cmemory.h"
31#include "sprpimpl.h"
32#include "ustr_imp.h"
33#include "uhash.h"
34#include "cstring.h"
35#include "udataswp.h"
36#include "ucln_cmn.h"
73c04bcf 37#include "ubidi_props.h"
b331163b 38#include "uprops.h"
374ca955 39
729e4ab9
A
40U_NAMESPACE_USE
41
374ca955
A
42U_CDECL_BEGIN
43
44/*
45Static cache for already opened StringPrep profiles
46*/
47static UHashtable *SHARED_DATA_HASHTABLE = NULL;
57a6839d 48static icu::UInitOnce gSharedDataInitOnce;
374ca955 49
51004dcb 50static UMutex usprepMutex = U_MUTEX_INITIALIZER;
374ca955
A
51
52/* format version of spp file */
51004dcb 53//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
374ca955
A
54
55/* the Unicode version of the sprep data */
56static UVersionInfo dataVersion={ 0, 0, 0, 0 };
57
729e4ab9 58/* Profile names must be aligned to UStringPrepProfileType */
51004dcb 59static const char * const PROFILE_NAMES[] = {
729e4ab9
A
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
74};
75
374ca955
A
76static UBool U_CALLCONV
77isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo *pInfo) {
81 if(
82 pInfo->size>=20 &&
83 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
84 pInfo->charsetFamily==U_CHARSET_FAMILY &&
85 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo->dataFormat[1]==0x50 &&
87 pInfo->dataFormat[2]==0x52 &&
88 pInfo->dataFormat[3]==0x50 &&
89 pInfo->formatVersion[0]==3 &&
90 pInfo->formatVersion[2]==UTRIE_SHIFT &&
91 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
92 ) {
51004dcb 93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
374ca955
A
94 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
95 return TRUE;
96 } else {
97 return FALSE;
98 }
99}
100
101static int32_t U_CALLCONV
102getSPrepFoldingOffset(uint32_t data) {
103
104 return (int32_t)data;
105
106}
107
108/* hashes an entry */
73c04bcf 109static int32_t U_CALLCONV
374ca955
A
110hashEntry(const UHashTok parm) {
111 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
112 UHashTok namekey, pathkey;
113 namekey.pointer = b->name;
114 pathkey.pointer = b->path;
115 return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey);
116}
117
118/* compares two entries */
73c04bcf 119static UBool U_CALLCONV
374ca955
A
120compareEntries(const UHashTok p1, const UHashTok p2) {
121 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
122 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
123 UHashTok name1, name2, path1, path2;
124 name1.pointer = b1->name;
125 name2.pointer = b2->name;
126 path1.pointer = b1->path;
127 path2.pointer = b2->path;
128 return ((UBool)(uhash_compareChars(name1, name2) &
129 uhash_compareChars(path1, path2)));
130}
131
132static void
133usprep_unload(UStringPrepProfile* data){
134 udata_close(data->sprepData);
135}
136
137static int32_t
138usprep_internal_flushCache(UBool noRefCount){
139 UStringPrepProfile *profile = NULL;
140 UStringPrepKey *key = NULL;
b331163b 141 int32_t pos = UHASH_FIRST;
374ca955
A
142 int32_t deletedNum = 0;
143 const UHashElement *e;
144
145 /*
146 * if shared data hasn't even been lazy evaluated yet
147 * return 0
148 */
149 umtx_lock(&usprepMutex);
150 if (SHARED_DATA_HASHTABLE == NULL) {
151 umtx_unlock(&usprepMutex);
152 return 0;
153 }
154
155 /*creates an enumeration to iterate through every element in the table */
156 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
157 {
158 profile = (UStringPrepProfile *) e->value.pointer;
159 key = (UStringPrepKey *) e->key.pointer;
160
161 if ((noRefCount== FALSE && profile->refCount == 0) ||
162 noRefCount== TRUE) {
163 deletedNum++;
164 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
165
166 /* unload the data */
167 usprep_unload(profile);
168
169 if(key->name != NULL) {
170 uprv_free(key->name);
171 key->name=NULL;
172 }
173 if(key->path != NULL) {
174 uprv_free(key->path);
175 key->path=NULL;
176 }
177 uprv_free(profile);
178 uprv_free(key);
179 }
180
181 }
182 umtx_unlock(&usprepMutex);
183
184 return deletedNum;
185}
186
187/* Works just like ucnv_flushCache()
188static int32_t
189usprep_flushCache(){
190 return usprep_internal_flushCache(FALSE);
191}
192*/
193
194static UBool U_CALLCONV usprep_cleanup(void){
195 if (SHARED_DATA_HASHTABLE != NULL) {
196 usprep_internal_flushCache(TRUE);
197 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
198 uhash_close(SHARED_DATA_HASHTABLE);
199 SHARED_DATA_HASHTABLE = NULL;
200 }
201 }
57a6839d 202 gSharedDataInitOnce.reset();
374ca955
A
203 return (SHARED_DATA_HASHTABLE == NULL);
204}
205U_CDECL_END
206
374ca955
A
207
208/** Initializes the cache for resources */
57a6839d
A
209static void U_CALLCONV
210createCache(UErrorCode &status) {
211 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
212 if (U_FAILURE(status)) {
213 SHARED_DATA_HASHTABLE = NULL;
214 }
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
216}
217
374ca955
A
218static void
219initCache(UErrorCode *status) {
57a6839d 220 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
374ca955
A
221}
222
223static UBool U_CALLCONV
224loadData(UStringPrepProfile* profile,
225 const char* path,
226 const char* name,
227 const char* type,
228 UErrorCode* errorCode) {
229 /* load Unicode SPREP data from file */
230 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
231 UDataMemory *dataMemory;
232 const int32_t *p=NULL;
233 const uint8_t *pb;
234 UVersionInfo normUnicodeVersion;
235 int32_t normUniVer, sprepUniVer, normCorrVer;
236
237 if(errorCode==NULL || U_FAILURE(*errorCode)) {
238 return 0;
239 }
240
241 /* open the data outside the mutex block */
242 //TODO: change the path
243 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
244 if(U_FAILURE(*errorCode)) {
245 return FALSE;
246 }
247
248 p=(const int32_t *)udata_getMemory(dataMemory);
249 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
250 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
251 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
252
253
254 if(U_FAILURE(*errorCode)) {
255 udata_close(dataMemory);
256 return FALSE;
257 }
258
259 /* in the mutex block, set the data for this process */
260 umtx_lock(&usprepMutex);
261 if(profile->sprepData==NULL) {
262 profile->sprepData=dataMemory;
263 dataMemory=NULL;
264 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
265 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
266 } else {
267 p=(const int32_t *)udata_getMemory(profile->sprepData);
268 }
269 umtx_unlock(&usprepMutex);
270 /* initialize some variables */
271 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
272
729e4ab9 273 u_getUnicodeVersion(normUnicodeVersion);
374ca955
A
274 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
275 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
276 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
277 (dataVersion[2] << 8 ) + (dataVersion[3]);
278 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
279
280 if(U_FAILURE(*errorCode)){
281 udata_close(dataMemory);
282 return FALSE;
283 }
284 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
287 ){
288 *errorCode = U_INVALID_FORMAT_ERROR;
289 udata_close(dataMemory);
290 return FALSE;
291 }
292 profile->isDataLoaded = TRUE;
293
294 /* if a different thread set it first, then close the extra data */
295 if(dataMemory!=NULL) {
296 udata_close(dataMemory); /* NULL if it was set correctly */
297 }
298
299
300 return profile->isDataLoaded;
301}
302
303static UStringPrepProfile*
304usprep_getProfile(const char* path,
305 const char* name,
306 UErrorCode *status){
307
308 UStringPrepProfile* profile = NULL;
309
310 initCache(status);
311
312 if(U_FAILURE(*status)){
313 return NULL;
314 }
315
316 UStringPrepKey stackKey;
317 /*
318 * const is cast way to save malloc, strcpy and free calls
319 * we use the passed in pointers for fetching the data from the
320 * hash table which is safe
321 */
322 stackKey.name = (char*) name;
323 stackKey.path = (char*) path;
324
325 /* fetch the data from the cache */
73c04bcf 326 umtx_lock(&usprepMutex);
374ca955 327 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
729e4ab9
A
328 if(profile != NULL) {
329 profile->refCount++;
330 }
73c04bcf 331 umtx_unlock(&usprepMutex);
374ca955 332
729e4ab9 333 if(profile == NULL) {
374ca955 334 /* else load the data and put the data in the cache */
729e4ab9
A
335 LocalMemory<UStringPrepProfile> newProfile;
336 if(newProfile.allocateInsteadAndReset() == NULL) {
374ca955 337 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955
A
338 return NULL;
339 }
340
729e4ab9
A
341 /* load the data */
342 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
374ca955
A
343 return NULL;
344 }
345
729e4ab9
A
346 /* get the options */
347 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
348 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374ca955 349
729e4ab9
A
350 if(newProfile->checkBiDi) {
351 newProfile->bdp = ubidi_getSingleton();
352 }
374ca955 353
729e4ab9
A
354 LocalMemory<UStringPrepKey> key;
355 LocalMemory<char> keyName;
356 LocalMemory<char> keyPath;
357 if( key.allocateInsteadAndReset() == NULL ||
358 keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
359 (path != NULL &&
360 keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
361 ) {
362 *status = U_MEMORY_ALLOCATION_ERROR;
363 usprep_unload(newProfile.getAlias());
374ca955
A
364 return NULL;
365 }
73c04bcf 366
374ca955 367 umtx_lock(&usprepMutex);
729e4ab9
A
368 // If another thread already inserted the same key/value, refcount and cleanup our thread data
369 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
370 if(profile != NULL) {
371 profile->refCount++;
372 usprep_unload(newProfile.getAlias());
373 }
374 else {
375 /* initialize the key members */
376 key->name = keyName.orphan();
377 uprv_strcpy(key->name, name);
378 if(path != NULL){
379 key->path = keyPath.orphan();
380 uprv_strcpy(key->path, path);
381 }
382 profile = newProfile.orphan();
383
384 /* add the data object to the cache */
385 profile->refCount = 1;
386 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
387 }
374ca955
A
388 umtx_unlock(&usprepMutex);
389 }
374ca955
A
390
391 return profile;
392}
393
394U_CAPI UStringPrepProfile* U_EXPORT2
395usprep_open(const char* path,
396 const char* name,
397 UErrorCode* status){
398
399 if(status == NULL || U_FAILURE(*status)){
400 return NULL;
401 }
374ca955
A
402
403 /* initialize the profile struct members */
73c04bcf 404 return usprep_getProfile(path,name,status);
374ca955
A
405}
406
729e4ab9
A
407U_CAPI UStringPrepProfile* U_EXPORT2
408usprep_openByType(UStringPrepProfileType type,
409 UErrorCode* status) {
410 if(status == NULL || U_FAILURE(*status)){
411 return NULL;
412 }
413 int32_t index = (int32_t)type;
2ca993e8 414 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
729e4ab9
A
415 *status = U_ILLEGAL_ARGUMENT_ERROR;
416 return NULL;
417 }
418 return usprep_open(NULL, PROFILE_NAMES[index], status);
419}
420
374ca955
A
421U_CAPI void U_EXPORT2
422usprep_close(UStringPrepProfile* profile){
423 if(profile==NULL){
424 return;
425 }
426
427 umtx_lock(&usprepMutex);
428 /* decrement the ref count*/
429 if(profile->refCount > 0){
430 profile->refCount--;
431 }
432 umtx_unlock(&usprepMutex);
433
434}
435
436U_CFUNC void
437uprv_syntaxError(const UChar* rules,
438 int32_t pos,
439 int32_t rulesLen,
440 UParseError* parseError){
441 if(parseError == NULL){
442 return;
443 }
444 parseError->offset = pos;
445 parseError->line = 0 ; // we are not using line numbers
446
447 // for pre-context
46f4442e 448 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
374ca955
A
449 int32_t limit = pos;
450
451 u_memcpy(parseError->preContext,rules+start,limit-start);
452 //null terminate the buffer
453 parseError->preContext[limit-start] = 0;
454
455 // for post-context; include error rules[pos]
456 start = pos;
457 limit = start + (U_PARSE_CONTEXT_LEN-1);
458 if (limit > rulesLen) {
459 limit = rulesLen;
460 }
461 if (start < rulesLen) {
462 u_memcpy(parseError->postContext,rules+start,limit-start);
463 }
464 //null terminate the buffer
465 parseError->postContext[limit-start]= 0;
466}
467
468
469static inline UStringPrepType
470getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
471
472 UStringPrepType type;
473 if(trieWord == 0){
474 /*
475 * Initial value stored in the mapping table
476 * just return USPREP_TYPE_LIMIT .. so that
477 * the source codepoint is copied to the destination
478 */
479 type = USPREP_TYPE_LIMIT;
73c04bcf
A
480 isIndex =FALSE;
481 value = 0;
374ca955
A
482 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
483 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
484 isIndex =FALSE;
485 value = 0;
374ca955
A
486 }else{
487 /* get the type */
488 type = USPREP_MAP;
489 /* ascertain if the value is index or delta */
490 if(trieWord & 0x02){
491 isIndex = TRUE;
492 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
493 }else{
494 isIndex = FALSE;
495 value = (int16_t)trieWord;
496 value = (value >> 2);
374ca955
A
497 }
498
499 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
500 type = USPREP_DELETE;
501 isIndex =FALSE;
502 value = 0;
503 }
504 }
505 return type;
506}
507
b331163b 508// TODO: change to writing to UnicodeString not UChar *
374ca955
A
509static int32_t
510usprep_map( const UStringPrepProfile* profile,
511 const UChar* src, int32_t srcLength,
512 UChar* dest, int32_t destCapacity,
513 int32_t options,
514 UParseError* parseError,
515 UErrorCode* status ){
516
517 uint16_t result;
518 int32_t destIndex=0;
519 int32_t srcIndex;
520 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
521 UStringPrepType type;
522 int16_t value;
523 UBool isIndex;
524 const int32_t* indexes = profile->indexes;
525
526 // no error checking the caller check for error and arguments
527 // no string length check the caller finds out the string length
528
529 for(srcIndex=0;srcIndex<srcLength;){
530 UChar32 ch;
531
532 U16_NEXT(src,srcIndex,srcLength,ch);
533
534 result=0;
535
536 UTRIE_GET16(&profile->sprepTrie,ch,result);
537
538 type = getValues(result, value, isIndex);
539
540 // check if the source codepoint is unassigned
541 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
542
543 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
544 *status = U_STRINGPREP_UNASSIGNED_ERROR;
545 return 0;
546
547 }else if(type == USPREP_MAP){
548
549 int32_t index, length;
550
551 if(isIndex){
552 index = value;
553 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
554 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
555 length = 1;
556 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
557 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
558 length = 2;
559 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
560 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
561 length = 3;
562 }else{
563 length = profile->mappingData[index++];
564
565 }
566
567 /* copy mapping to destination */
568 for(int32_t i=0; i< length; i++){
569 if(destIndex < destCapacity ){
570 dest[destIndex] = profile->mappingData[index+i];
571 }
572 destIndex++; /* for pre-flighting */
573 }
574 continue;
575 }else{
576 // subtract the delta to arrive at the code point
577 ch -= value;
578 }
579
580 }else if(type==USPREP_DELETE){
581 // just consume the codepoint and contine
582 continue;
583 }
584 //copy the code point into destination
585 if(ch <= 0xFFFF){
586 if(destIndex < destCapacity ){
587 dest[destIndex] = (UChar)ch;
588 }
589 destIndex++;
590 }else{
591 if(destIndex+1 < destCapacity ){
592 dest[destIndex] = U16_LEAD(ch);
593 dest[destIndex+1] = U16_TRAIL(ch);
594 }
595 destIndex +=2;
596 }
597
598 }
599
600 return u_terminateUChars(dest, destCapacity, destIndex, status);
601}
602
b331163b 603/*
374ca955
A
604 1) Map -- For each character in the input, check if it has a mapping
605 and, if so, replace it with its mapping.
606
607 2) Normalize -- Possibly normalize the result of step 1 using Unicode
608 normalization.
609
610 3) Prohibit -- Check for any characters that are not allowed in the
611 output. If any are found, return an error.
612
613 4) Check bidi -- Possibly check for right-to-left characters, and if
614 any are found, make sure that the whole string satisfies the
615 requirements for bidirectional strings. If the string does not
616 satisfy the requirements for bidirectional strings, return an
617 error.
618 [Unicode3.2] defines several bidirectional categories; each character
619 has one bidirectional category assigned to it. For the purposes of
620 the requirements below, an "RandALCat character" is a character that
621 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
622 is a character that has Unicode bidirectional category "L". Note
623
624
625 that there are many characters which fall in neither of the above
626 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
627 this because they have bidirectional category "EN".
628
629 In any profile that specifies bidirectional character handling, all
630 three of the following requirements MUST be met:
631
632 1) The characters in section 5.8 MUST be prohibited.
633
634 2) If a string contains any RandALCat character, the string MUST NOT
635 contain any LCat character.
636
637 3) If a string contains any RandALCat character, a RandALCat
638 character MUST be the first character of the string, and a
639 RandALCat character MUST be the last character of the string.
640*/
374ca955
A
641U_CAPI int32_t U_EXPORT2
642usprep_prepare( const UStringPrepProfile* profile,
643 const UChar* src, int32_t srcLength,
644 UChar* dest, int32_t destCapacity,
645 int32_t options,
646 UParseError* parseError,
647 UErrorCode* status ){
648
649 // check error status
b331163b 650 if(U_FAILURE(*status)){
374ca955
A
651 return 0;
652 }
b331163b 653
374ca955 654 //check arguments
b331163b
A
655 if(profile==NULL ||
656 (src==NULL ? srcLength!=0 : srcLength<-1) ||
657 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
374ca955
A
658 *status=U_ILLEGAL_ARGUMENT_ERROR;
659 return 0;
660 }
661
374ca955 662 //get the string length
b331163b 663 if(srcLength < 0){
374ca955
A
664 srcLength = u_strlen(src);
665 }
666 // map
b331163b
A
667 UnicodeString s1;
668 UChar *b1 = s1.getBuffer(srcLength);
669 if(b1==NULL){
670 *status = U_MEMORY_ALLOCATION_ERROR;
671 return 0;
672 }
673 int32_t b1Len = usprep_map(profile, src, srcLength,
674 b1, s1.getCapacity(), options, parseError, status);
675 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
374ca955
A
676
677 if(*status == U_BUFFER_OVERFLOW_ERROR){
678 // redo processing of string
679 /* we do not have enough room so grow the buffer*/
b331163b 680 b1 = s1.getBuffer(b1Len);
374ca955
A
681 if(b1==NULL){
682 *status = U_MEMORY_ALLOCATION_ERROR;
b331163b 683 return 0;
374ca955
A
684 }
685
686 *status = U_ZERO_ERROR; // reset error
b331163b
A
687 b1Len = usprep_map(profile, src, srcLength,
688 b1, s1.getCapacity(), options, parseError, status);
689 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
690 }
691 if(U_FAILURE(*status)){
692 return 0;
374ca955
A
693 }
694
695 // normalize
b331163b
A
696 UnicodeString s2;
697 if(profile->doNFKC){
698 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
699 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
700 if(U_FAILURE(*status)){
701 return 0;
374ca955 702 }
b331163b 703 fn2.normalize(s1, s2, *status);
374ca955 704 }else{
b331163b 705 s2.fastCopyFrom(s1);
374ca955 706 }
374ca955 707 if(U_FAILURE(*status)){
b331163b 708 return 0;
374ca955
A
709 }
710
374ca955 711 // Prohibit and checkBiDi in one pass
b331163b
A
712 const UChar *b2 = s2.getBuffer();
713 int32_t b2Len = s2.length();
714 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
715 UBool leftToRight=FALSE, rightToLeft=FALSE;
716 int32_t rtlPos =-1, ltrPos =-1;
374ca955 717
b331163b
A
718 for(int32_t b2Index=0; b2Index<b2Len;){
719 UChar32 ch = 0;
374ca955
A
720 U16_NEXT(b2, b2Index, b2Len, ch);
721
b331163b 722 uint16_t result;
374ca955 723 UTRIE_GET16(&profile->sprepTrie,ch,result);
b331163b
A
724
725 int16_t value;
726 UBool isIndex;
727 UStringPrepType type = getValues(result, value, isIndex);
374ca955
A
728
729 if( type == USPREP_PROHIBITED ||
730 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
731 ){
732 *status = U_STRINGPREP_PROHIBITED_ERROR;
733 uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
b331163b 734 return 0;
374ca955
A
735 }
736
73c04bcf
A
737 if(profile->checkBiDi) {
738 direction = ubidi_getClass(profile->bdp, ch);
739 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
740 firstCharDir = direction;
741 }
742 if(direction == U_LEFT_TO_RIGHT){
743 leftToRight = TRUE;
744 ltrPos = b2Index-1;
745 }
746 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
747 rightToLeft = TRUE;
748 rtlPos = b2Index-1;
749 }
374ca955 750 }
b331163b 751 }
374ca955
A
752 if(profile->checkBiDi == TRUE){
753 // satisfy 2
754 if( leftToRight == TRUE && rightToLeft == TRUE){
755 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
756 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
b331163b 757 return 0;
374ca955
A
758 }
759
760 //satisfy 3
761 if( rightToLeft == TRUE &&
762 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
763 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
764 ){
765 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
766 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
767 return FALSE;
768 }
769 }
b331163b 770 return s2.extract(dest, destCapacity, *status);
374ca955
A
771}
772
773
774/* data swapping ------------------------------------------------------------ */
775
776U_CAPI int32_t U_EXPORT2
777usprep_swap(const UDataSwapper *ds,
778 const void *inData, int32_t length, void *outData,
779 UErrorCode *pErrorCode) {
780 const UDataInfo *pInfo;
781 int32_t headerSize;
782
783 const uint8_t *inBytes;
784 uint8_t *outBytes;
785
786 const int32_t *inIndexes;
787 int32_t indexes[16];
788
789 int32_t i, offset, count, size;
790
791 /* udata_swapDataHeader checks the arguments */
792 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
793 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
794 return 0;
795 }
796
797 /* check data format and format version */
798 pInfo=(const UDataInfo *)((const char *)inData+4);
799 if(!(
800 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
801 pInfo->dataFormat[1]==0x50 &&
802 pInfo->dataFormat[2]==0x52 &&
803 pInfo->dataFormat[3]==0x50 &&
804 pInfo->formatVersion[0]==3
805 )) {
806 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
807 pInfo->dataFormat[0], pInfo->dataFormat[1],
808 pInfo->dataFormat[2], pInfo->dataFormat[3],
809 pInfo->formatVersion[0]);
810 *pErrorCode=U_UNSUPPORTED_ERROR;
811 return 0;
812 }
813
814 inBytes=(const uint8_t *)inData+headerSize;
815 outBytes=(uint8_t *)outData+headerSize;
816
817 inIndexes=(const int32_t *)inBytes;
818
819 if(length>=0) {
820 length-=headerSize;
821 if(length<16*4) {
822 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
823 length);
824 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
825 return 0;
826 }
827 }
828
829 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
830 for(i=0; i<16; ++i) {
831 indexes[i]=udata_readInt32(ds, inIndexes[i]);
832 }
833
834 /* calculate the total length of the data */
835 size=
836 16*4+ /* size of indexes[] */
837 indexes[_SPREP_INDEX_TRIE_SIZE]+
838 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
839
840 if(length>=0) {
841 if(length<size) {
842 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
843 length);
844 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
845 return 0;
846 }
847
848 /* copy the data for inaccessible bytes */
849 if(inBytes!=outBytes) {
850 uprv_memcpy(outBytes, inBytes, size);
851 }
852
853 offset=0;
854
855 /* swap the int32_t indexes[] */
856 count=16*4;
857 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
858 offset+=count;
859
860 /* swap the UTrie */
861 count=indexes[_SPREP_INDEX_TRIE_SIZE];
862 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
863 offset+=count;
864
865 /* swap the uint16_t mappingTable[] */
866 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
867 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
b331163b 868 //offset+=count;
374ca955
A
869 }
870
871 return headerSize+size;
872}
873
874#endif /* #if !UCONFIG_NO_IDNA */