]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/usprep.cpp
ICU-64243.0.1.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4 *******************************************************************************
5 *
2ca993e8 6 * Copyright (C) 2003-2016, International Business Machines
374ca955
A
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
f3c0d7a5 11 * encoding: UTF-8
374ca955
A
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_IDNA
22
23#include "unicode/usprep.h"
24
b331163b 25#include "unicode/normalizer2.h"
374ca955
A
26#include "unicode/ustring.h"
27#include "unicode/uchar.h"
28#include "unicode/uversion.h"
29#include "umutex.h"
30#include "cmemory.h"
31#include "sprpimpl.h"
32#include "ustr_imp.h"
33#include "uhash.h"
34#include "cstring.h"
35#include "udataswp.h"
36#include "ucln_cmn.h"
73c04bcf 37#include "ubidi_props.h"
b331163b 38#include "uprops.h"
374ca955 39
729e4ab9
A
40U_NAMESPACE_USE
41
374ca955
A
42U_CDECL_BEGIN
43
44/*
45Static cache for already opened StringPrep profiles
46*/
47static UHashtable *SHARED_DATA_HASHTABLE = NULL;
3d1f044b 48static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
374ca955 49
3d1f044b
A
50static UMutex *usprepMutex() {
51 static UMutex *m = STATIC_NEW(UMutex);
52 return m;
53}
374ca955
A
54
55/* format version of spp file */
51004dcb 56//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
374ca955
A
57
58/* the Unicode version of the sprep data */
59static UVersionInfo dataVersion={ 0, 0, 0, 0 };
60
729e4ab9 61/* Profile names must be aligned to UStringPrepProfileType */
51004dcb 62static const char * const PROFILE_NAMES[] = {
729e4ab9
A
63 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
64 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
65 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
66 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
67 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
68 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
69 "rfc3722", /* USPREP_RFC3722_ISCSI */
70 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
71 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
72 "rfc4011", /* USPREP_RFC4011_MIB */
73 "rfc4013", /* USPREP_RFC4013_SASLPREP */
74 "rfc4505", /* USPREP_RFC4505_TRACE */
75 "rfc4518", /* USPREP_RFC4518_LDAP */
76 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
77};
78
374ca955
A
79static UBool U_CALLCONV
80isSPrepAcceptable(void * /* context */,
81 const char * /* type */,
82 const char * /* name */,
83 const UDataInfo *pInfo) {
84 if(
85 pInfo->size>=20 &&
86 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
87 pInfo->charsetFamily==U_CHARSET_FAMILY &&
88 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
89 pInfo->dataFormat[1]==0x50 &&
90 pInfo->dataFormat[2]==0x52 &&
91 pInfo->dataFormat[3]==0x50 &&
92 pInfo->formatVersion[0]==3 &&
93 pInfo->formatVersion[2]==UTRIE_SHIFT &&
94 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
95 ) {
51004dcb 96 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
374ca955
A
97 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
98 return TRUE;
99 } else {
100 return FALSE;
101 }
102}
103
104static int32_t U_CALLCONV
105getSPrepFoldingOffset(uint32_t data) {
106
107 return (int32_t)data;
108
109}
110
111/* hashes an entry */
73c04bcf 112static int32_t U_CALLCONV
374ca955
A
113hashEntry(const UHashTok parm) {
114 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
115 UHashTok namekey, pathkey;
116 namekey.pointer = b->name;
117 pathkey.pointer = b->path;
3d1f044b
A
118 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
119 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
120 return static_cast<int32_t>(unsignedHash);
374ca955
A
121}
122
123/* compares two entries */
73c04bcf 124static UBool U_CALLCONV
374ca955
A
125compareEntries(const UHashTok p1, const UHashTok p2) {
126 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
127 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
128 UHashTok name1, name2, path1, path2;
129 name1.pointer = b1->name;
130 name2.pointer = b2->name;
131 path1.pointer = b1->path;
132 path2.pointer = b2->path;
133 return ((UBool)(uhash_compareChars(name1, name2) &
134 uhash_compareChars(path1, path2)));
135}
136
137static void
138usprep_unload(UStringPrepProfile* data){
139 udata_close(data->sprepData);
140}
141
142static int32_t
143usprep_internal_flushCache(UBool noRefCount){
144 UStringPrepProfile *profile = NULL;
145 UStringPrepKey *key = NULL;
b331163b 146 int32_t pos = UHASH_FIRST;
374ca955
A
147 int32_t deletedNum = 0;
148 const UHashElement *e;
149
150 /*
151 * if shared data hasn't even been lazy evaluated yet
152 * return 0
153 */
3d1f044b 154 umtx_lock(usprepMutex());
374ca955 155 if (SHARED_DATA_HASHTABLE == NULL) {
3d1f044b 156 umtx_unlock(usprepMutex());
374ca955
A
157 return 0;
158 }
159
160 /*creates an enumeration to iterate through every element in the table */
161 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
162 {
163 profile = (UStringPrepProfile *) e->value.pointer;
164 key = (UStringPrepKey *) e->key.pointer;
165
166 if ((noRefCount== FALSE && profile->refCount == 0) ||
167 noRefCount== TRUE) {
168 deletedNum++;
169 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
170
171 /* unload the data */
172 usprep_unload(profile);
173
174 if(key->name != NULL) {
175 uprv_free(key->name);
176 key->name=NULL;
177 }
178 if(key->path != NULL) {
179 uprv_free(key->path);
180 key->path=NULL;
181 }
182 uprv_free(profile);
183 uprv_free(key);
184 }
185
186 }
3d1f044b 187 umtx_unlock(usprepMutex());
374ca955
A
188
189 return deletedNum;
190}
191
192/* Works just like ucnv_flushCache()
193static int32_t
194usprep_flushCache(){
195 return usprep_internal_flushCache(FALSE);
196}
197*/
198
199static UBool U_CALLCONV usprep_cleanup(void){
200 if (SHARED_DATA_HASHTABLE != NULL) {
201 usprep_internal_flushCache(TRUE);
202 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
203 uhash_close(SHARED_DATA_HASHTABLE);
204 SHARED_DATA_HASHTABLE = NULL;
205 }
206 }
57a6839d 207 gSharedDataInitOnce.reset();
374ca955
A
208 return (SHARED_DATA_HASHTABLE == NULL);
209}
210U_CDECL_END
211
374ca955
A
212
213/** Initializes the cache for resources */
57a6839d
A
214static void U_CALLCONV
215createCache(UErrorCode &status) {
216 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
217 if (U_FAILURE(status)) {
218 SHARED_DATA_HASHTABLE = NULL;
219 }
220 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
221}
222
374ca955
A
223static void
224initCache(UErrorCode *status) {
57a6839d 225 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
374ca955
A
226}
227
228static UBool U_CALLCONV
229loadData(UStringPrepProfile* profile,
230 const char* path,
231 const char* name,
232 const char* type,
233 UErrorCode* errorCode) {
234 /* load Unicode SPREP data from file */
235 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
236 UDataMemory *dataMemory;
237 const int32_t *p=NULL;
238 const uint8_t *pb;
239 UVersionInfo normUnicodeVersion;
240 int32_t normUniVer, sprepUniVer, normCorrVer;
241
242 if(errorCode==NULL || U_FAILURE(*errorCode)) {
243 return 0;
244 }
245
246 /* open the data outside the mutex block */
247 //TODO: change the path
248 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
249 if(U_FAILURE(*errorCode)) {
250 return FALSE;
251 }
252
253 p=(const int32_t *)udata_getMemory(dataMemory);
254 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
255 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
256 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
257
258
259 if(U_FAILURE(*errorCode)) {
260 udata_close(dataMemory);
261 return FALSE;
262 }
263
264 /* in the mutex block, set the data for this process */
3d1f044b 265 umtx_lock(usprepMutex());
374ca955
A
266 if(profile->sprepData==NULL) {
267 profile->sprepData=dataMemory;
268 dataMemory=NULL;
269 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
270 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
271 } else {
272 p=(const int32_t *)udata_getMemory(profile->sprepData);
273 }
3d1f044b 274 umtx_unlock(usprepMutex());
374ca955
A
275 /* initialize some variables */
276 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
277
729e4ab9 278 u_getUnicodeVersion(normUnicodeVersion);
374ca955
A
279 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
280 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
281 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
282 (dataVersion[2] << 8 ) + (dataVersion[3]);
283 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
284
285 if(U_FAILURE(*errorCode)){
286 udata_close(dataMemory);
287 return FALSE;
288 }
289 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
290 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
291 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
292 ){
293 *errorCode = U_INVALID_FORMAT_ERROR;
294 udata_close(dataMemory);
295 return FALSE;
296 }
297 profile->isDataLoaded = TRUE;
298
299 /* if a different thread set it first, then close the extra data */
300 if(dataMemory!=NULL) {
301 udata_close(dataMemory); /* NULL if it was set correctly */
302 }
303
304
305 return profile->isDataLoaded;
306}
307
308static UStringPrepProfile*
309usprep_getProfile(const char* path,
310 const char* name,
311 UErrorCode *status){
312
313 UStringPrepProfile* profile = NULL;
314
315 initCache(status);
316
317 if(U_FAILURE(*status)){
318 return NULL;
319 }
320
321 UStringPrepKey stackKey;
322 /*
323 * const is cast way to save malloc, strcpy and free calls
324 * we use the passed in pointers for fetching the data from the
325 * hash table which is safe
326 */
327 stackKey.name = (char*) name;
328 stackKey.path = (char*) path;
329
330 /* fetch the data from the cache */
3d1f044b 331 umtx_lock(usprepMutex());
374ca955 332 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
729e4ab9
A
333 if(profile != NULL) {
334 profile->refCount++;
335 }
3d1f044b 336 umtx_unlock(usprepMutex());
374ca955 337
729e4ab9 338 if(profile == NULL) {
374ca955 339 /* else load the data and put the data in the cache */
729e4ab9
A
340 LocalMemory<UStringPrepProfile> newProfile;
341 if(newProfile.allocateInsteadAndReset() == NULL) {
374ca955 342 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955
A
343 return NULL;
344 }
345
729e4ab9
A
346 /* load the data */
347 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
374ca955
A
348 return NULL;
349 }
350
729e4ab9
A
351 /* get the options */
352 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
353 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
374ca955 354
729e4ab9
A
355 LocalMemory<UStringPrepKey> key;
356 LocalMemory<char> keyName;
357 LocalMemory<char> keyPath;
358 if( key.allocateInsteadAndReset() == NULL ||
3d1f044b 359 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
729e4ab9 360 (path != NULL &&
3d1f044b 361 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
729e4ab9
A
362 ) {
363 *status = U_MEMORY_ALLOCATION_ERROR;
364 usprep_unload(newProfile.getAlias());
374ca955
A
365 return NULL;
366 }
73c04bcf 367
3d1f044b 368 umtx_lock(usprepMutex());
729e4ab9
A
369 // If another thread already inserted the same key/value, refcount and cleanup our thread data
370 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
371 if(profile != NULL) {
372 profile->refCount++;
373 usprep_unload(newProfile.getAlias());
374 }
375 else {
376 /* initialize the key members */
377 key->name = keyName.orphan();
378 uprv_strcpy(key->name, name);
379 if(path != NULL){
380 key->path = keyPath.orphan();
381 uprv_strcpy(key->path, path);
382 }
383 profile = newProfile.orphan();
384
385 /* add the data object to the cache */
386 profile->refCount = 1;
387 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
388 }
3d1f044b 389 umtx_unlock(usprepMutex());
374ca955 390 }
374ca955
A
391
392 return profile;
393}
394
395U_CAPI UStringPrepProfile* U_EXPORT2
396usprep_open(const char* path,
397 const char* name,
398 UErrorCode* status){
399
400 if(status == NULL || U_FAILURE(*status)){
401 return NULL;
402 }
374ca955
A
403
404 /* initialize the profile struct members */
73c04bcf 405 return usprep_getProfile(path,name,status);
374ca955
A
406}
407
729e4ab9
A
408U_CAPI UStringPrepProfile* U_EXPORT2
409usprep_openByType(UStringPrepProfileType type,
410 UErrorCode* status) {
411 if(status == NULL || U_FAILURE(*status)){
412 return NULL;
413 }
414 int32_t index = (int32_t)type;
2ca993e8 415 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
729e4ab9
A
416 *status = U_ILLEGAL_ARGUMENT_ERROR;
417 return NULL;
418 }
419 return usprep_open(NULL, PROFILE_NAMES[index], status);
420}
421
374ca955
A
422U_CAPI void U_EXPORT2
423usprep_close(UStringPrepProfile* profile){
424 if(profile==NULL){
425 return;
426 }
427
3d1f044b 428 umtx_lock(usprepMutex());
374ca955
A
429 /* decrement the ref count*/
430 if(profile->refCount > 0){
431 profile->refCount--;
432 }
3d1f044b 433 umtx_unlock(usprepMutex());
374ca955
A
434
435}
436
437U_CFUNC void
438uprv_syntaxError(const UChar* rules,
439 int32_t pos,
440 int32_t rulesLen,
441 UParseError* parseError){
442 if(parseError == NULL){
443 return;
444 }
445 parseError->offset = pos;
446 parseError->line = 0 ; // we are not using line numbers
447
448 // for pre-context
46f4442e 449 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
374ca955
A
450 int32_t limit = pos;
451
452 u_memcpy(parseError->preContext,rules+start,limit-start);
453 //null terminate the buffer
454 parseError->preContext[limit-start] = 0;
455
456 // for post-context; include error rules[pos]
457 start = pos;
458 limit = start + (U_PARSE_CONTEXT_LEN-1);
459 if (limit > rulesLen) {
460 limit = rulesLen;
461 }
462 if (start < rulesLen) {
463 u_memcpy(parseError->postContext,rules+start,limit-start);
464 }
465 //null terminate the buffer
466 parseError->postContext[limit-start]= 0;
467}
468
469
470static inline UStringPrepType
471getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
472
473 UStringPrepType type;
474 if(trieWord == 0){
475 /*
476 * Initial value stored in the mapping table
477 * just return USPREP_TYPE_LIMIT .. so that
478 * the source codepoint is copied to the destination
479 */
480 type = USPREP_TYPE_LIMIT;
73c04bcf
A
481 isIndex =FALSE;
482 value = 0;
374ca955
A
483 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
484 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
73c04bcf
A
485 isIndex =FALSE;
486 value = 0;
374ca955
A
487 }else{
488 /* get the type */
489 type = USPREP_MAP;
490 /* ascertain if the value is index or delta */
491 if(trieWord & 0x02){
492 isIndex = TRUE;
493 value = trieWord >> 2; //mask off the lower 2 bits and shift
374ca955
A
494 }else{
495 isIndex = FALSE;
496 value = (int16_t)trieWord;
497 value = (value >> 2);
374ca955
A
498 }
499
500 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
501 type = USPREP_DELETE;
502 isIndex =FALSE;
503 value = 0;
504 }
505 }
506 return type;
507}
508
b331163b 509// TODO: change to writing to UnicodeString not UChar *
374ca955
A
510static int32_t
511usprep_map( const UStringPrepProfile* profile,
512 const UChar* src, int32_t srcLength,
513 UChar* dest, int32_t destCapacity,
514 int32_t options,
515 UParseError* parseError,
516 UErrorCode* status ){
517
518 uint16_t result;
519 int32_t destIndex=0;
520 int32_t srcIndex;
521 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
522 UStringPrepType type;
523 int16_t value;
524 UBool isIndex;
525 const int32_t* indexes = profile->indexes;
526
527 // no error checking the caller check for error and arguments
528 // no string length check the caller finds out the string length
529
530 for(srcIndex=0;srcIndex<srcLength;){
531 UChar32 ch;
532
533 U16_NEXT(src,srcIndex,srcLength,ch);
534
535 result=0;
536
537 UTRIE_GET16(&profile->sprepTrie,ch,result);
538
539 type = getValues(result, value, isIndex);
540
541 // check if the source codepoint is unassigned
542 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
543
544 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
545 *status = U_STRINGPREP_UNASSIGNED_ERROR;
546 return 0;
547
548 }else if(type == USPREP_MAP){
549
550 int32_t index, length;
551
552 if(isIndex){
553 index = value;
554 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
556 length = 1;
557 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
559 length = 2;
560 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
561 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
562 length = 3;
563 }else{
564 length = profile->mappingData[index++];
565
566 }
567
568 /* copy mapping to destination */
569 for(int32_t i=0; i< length; i++){
570 if(destIndex < destCapacity ){
571 dest[destIndex] = profile->mappingData[index+i];
572 }
573 destIndex++; /* for pre-flighting */
574 }
575 continue;
576 }else{
577 // subtract the delta to arrive at the code point
578 ch -= value;
579 }
580
581 }else if(type==USPREP_DELETE){
582 // just consume the codepoint and contine
583 continue;
584 }
585 //copy the code point into destination
586 if(ch <= 0xFFFF){
587 if(destIndex < destCapacity ){
588 dest[destIndex] = (UChar)ch;
589 }
590 destIndex++;
591 }else{
592 if(destIndex+1 < destCapacity ){
593 dest[destIndex] = U16_LEAD(ch);
594 dest[destIndex+1] = U16_TRAIL(ch);
595 }
596 destIndex +=2;
597 }
598
599 }
600
601 return u_terminateUChars(dest, destCapacity, destIndex, status);
602}
603
b331163b 604/*
374ca955
A
605 1) Map -- For each character in the input, check if it has a mapping
606 and, if so, replace it with its mapping.
607
608 2) Normalize -- Possibly normalize the result of step 1 using Unicode
609 normalization.
610
611 3) Prohibit -- Check for any characters that are not allowed in the
612 output. If any are found, return an error.
613
614 4) Check bidi -- Possibly check for right-to-left characters, and if
615 any are found, make sure that the whole string satisfies the
616 requirements for bidirectional strings. If the string does not
617 satisfy the requirements for bidirectional strings, return an
618 error.
619 [Unicode3.2] defines several bidirectional categories; each character
620 has one bidirectional category assigned to it. For the purposes of
621 the requirements below, an "RandALCat character" is a character that
622 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
623 is a character that has Unicode bidirectional category "L". Note
624
625
626 that there are many characters which fall in neither of the above
627 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
628 this because they have bidirectional category "EN".
629
630 In any profile that specifies bidirectional character handling, all
631 three of the following requirements MUST be met:
632
633 1) The characters in section 5.8 MUST be prohibited.
634
635 2) If a string contains any RandALCat character, the string MUST NOT
636 contain any LCat character.
637
638 3) If a string contains any RandALCat character, a RandALCat
639 character MUST be the first character of the string, and a
640 RandALCat character MUST be the last character of the string.
641*/
374ca955
A
642U_CAPI int32_t U_EXPORT2
643usprep_prepare( const UStringPrepProfile* profile,
644 const UChar* src, int32_t srcLength,
645 UChar* dest, int32_t destCapacity,
646 int32_t options,
647 UParseError* parseError,
648 UErrorCode* status ){
649
650 // check error status
b331163b 651 if(U_FAILURE(*status)){
374ca955
A
652 return 0;
653 }
b331163b 654
374ca955 655 //check arguments
b331163b
A
656 if(profile==NULL ||
657 (src==NULL ? srcLength!=0 : srcLength<-1) ||
658 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
374ca955
A
659 *status=U_ILLEGAL_ARGUMENT_ERROR;
660 return 0;
661 }
662
374ca955 663 //get the string length
b331163b 664 if(srcLength < 0){
374ca955
A
665 srcLength = u_strlen(src);
666 }
667 // map
b331163b
A
668 UnicodeString s1;
669 UChar *b1 = s1.getBuffer(srcLength);
670 if(b1==NULL){
671 *status = U_MEMORY_ALLOCATION_ERROR;
672 return 0;
673 }
674 int32_t b1Len = usprep_map(profile, src, srcLength,
675 b1, s1.getCapacity(), options, parseError, status);
676 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
374ca955
A
677
678 if(*status == U_BUFFER_OVERFLOW_ERROR){
679 // redo processing of string
680 /* we do not have enough room so grow the buffer*/
b331163b 681 b1 = s1.getBuffer(b1Len);
374ca955
A
682 if(b1==NULL){
683 *status = U_MEMORY_ALLOCATION_ERROR;
b331163b 684 return 0;
374ca955
A
685 }
686
687 *status = U_ZERO_ERROR; // reset error
b331163b
A
688 b1Len = usprep_map(profile, src, srcLength,
689 b1, s1.getCapacity(), options, parseError, status);
690 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
691 }
692 if(U_FAILURE(*status)){
693 return 0;
374ca955
A
694 }
695
696 // normalize
b331163b
A
697 UnicodeString s2;
698 if(profile->doNFKC){
699 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
700 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
701 if(U_FAILURE(*status)){
702 return 0;
374ca955 703 }
b331163b 704 fn2.normalize(s1, s2, *status);
374ca955 705 }else{
b331163b 706 s2.fastCopyFrom(s1);
374ca955 707 }
374ca955 708 if(U_FAILURE(*status)){
b331163b 709 return 0;
374ca955
A
710 }
711
374ca955 712 // Prohibit and checkBiDi in one pass
b331163b
A
713 const UChar *b2 = s2.getBuffer();
714 int32_t b2Len = s2.length();
715 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
716 UBool leftToRight=FALSE, rightToLeft=FALSE;
717 int32_t rtlPos =-1, ltrPos =-1;
374ca955 718
b331163b
A
719 for(int32_t b2Index=0; b2Index<b2Len;){
720 UChar32 ch = 0;
374ca955
A
721 U16_NEXT(b2, b2Index, b2Len, ch);
722
b331163b 723 uint16_t result;
374ca955 724 UTRIE_GET16(&profile->sprepTrie,ch,result);
b331163b
A
725
726 int16_t value;
727 UBool isIndex;
728 UStringPrepType type = getValues(result, value, isIndex);
374ca955
A
729
730 if( type == USPREP_PROHIBITED ||
731 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
732 ){
733 *status = U_STRINGPREP_PROHIBITED_ERROR;
3d1f044b 734 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
b331163b 735 return 0;
374ca955
A
736 }
737
73c04bcf 738 if(profile->checkBiDi) {
0f5d89e8 739 direction = ubidi_getClass(ch);
73c04bcf
A
740 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
741 firstCharDir = direction;
742 }
743 if(direction == U_LEFT_TO_RIGHT){
744 leftToRight = TRUE;
745 ltrPos = b2Index-1;
746 }
747 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
748 rightToLeft = TRUE;
749 rtlPos = b2Index-1;
750 }
374ca955 751 }
b331163b 752 }
374ca955
A
753 if(profile->checkBiDi == TRUE){
754 // satisfy 2
755 if( leftToRight == TRUE && rightToLeft == TRUE){
756 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
757 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
b331163b 758 return 0;
374ca955
A
759 }
760
761 //satisfy 3
762 if( rightToLeft == TRUE &&
763 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
764 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
765 ){
766 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
767 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
768 return FALSE;
769 }
770 }
b331163b 771 return s2.extract(dest, destCapacity, *status);
374ca955
A
772}
773
774
775/* data swapping ------------------------------------------------------------ */
776
777U_CAPI int32_t U_EXPORT2
778usprep_swap(const UDataSwapper *ds,
779 const void *inData, int32_t length, void *outData,
780 UErrorCode *pErrorCode) {
781 const UDataInfo *pInfo;
782 int32_t headerSize;
783
784 const uint8_t *inBytes;
785 uint8_t *outBytes;
786
787 const int32_t *inIndexes;
788 int32_t indexes[16];
789
790 int32_t i, offset, count, size;
791
792 /* udata_swapDataHeader checks the arguments */
793 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
794 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
795 return 0;
796 }
797
798 /* check data format and format version */
799 pInfo=(const UDataInfo *)((const char *)inData+4);
800 if(!(
801 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
802 pInfo->dataFormat[1]==0x50 &&
803 pInfo->dataFormat[2]==0x52 &&
804 pInfo->dataFormat[3]==0x50 &&
805 pInfo->formatVersion[0]==3
806 )) {
807 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
808 pInfo->dataFormat[0], pInfo->dataFormat[1],
809 pInfo->dataFormat[2], pInfo->dataFormat[3],
810 pInfo->formatVersion[0]);
811 *pErrorCode=U_UNSUPPORTED_ERROR;
812 return 0;
813 }
814
815 inBytes=(const uint8_t *)inData+headerSize;
816 outBytes=(uint8_t *)outData+headerSize;
817
818 inIndexes=(const int32_t *)inBytes;
819
820 if(length>=0) {
821 length-=headerSize;
822 if(length<16*4) {
823 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
824 length);
825 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
826 return 0;
827 }
828 }
829
830 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
831 for(i=0; i<16; ++i) {
832 indexes[i]=udata_readInt32(ds, inIndexes[i]);
833 }
834
835 /* calculate the total length of the data */
836 size=
837 16*4+ /* size of indexes[] */
838 indexes[_SPREP_INDEX_TRIE_SIZE]+
839 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
840
841 if(length>=0) {
842 if(length<size) {
843 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
844 length);
845 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
846 return 0;
847 }
848
849 /* copy the data for inaccessible bytes */
850 if(inBytes!=outBytes) {
851 uprv_memcpy(outBytes, inBytes, size);
852 }
853
854 offset=0;
855
856 /* swap the int32_t indexes[] */
857 count=16*4;
858 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
859 offset+=count;
860
861 /* swap the UTrie */
862 count=indexes[_SPREP_INDEX_TRIE_SIZE];
863 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
864 offset+=count;
865
866 /* swap the uint16_t mappingTable[] */
867 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
868 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
b331163b 869 //offset+=count;
374ca955
A
870 }
871
872 return headerSize+size;
873}
874
875#endif /* #if !UCONFIG_NO_IDNA */