]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/usprep.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / common / usprep.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: usprep.cpp
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
17 */
18
19 #include "unicode/utypes.h"
20
21 #if !UCONFIG_NO_IDNA
22
23 #include "unicode/usprep.h"
24
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
29 #include "umutex.h"
30 #include "cmemory.h"
31 #include "sprpimpl.h"
32 #include "ustr_imp.h"
33 #include "uhash.h"
34 #include "cstring.h"
35 #include "udataswp.h"
36 #include "ucln_cmn.h"
37 #include "ubidi_props.h"
38 #include "uprops.h"
39
40 U_NAMESPACE_USE
41
42 U_CDECL_BEGIN
43
44 /*
45 Static cache for already opened StringPrep profiles
46 */
47 static UHashtable *SHARED_DATA_HASHTABLE = NULL;
48 static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER;
49
50 static UMutex *usprepMutex() {
51 static UMutex *m = STATIC_NEW(UMutex);
52 return m;
53 }
54
55 /* format version of spp file */
56 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
57
58 /* the Unicode version of the sprep data */
59 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
60
61 /* Profile names must be aligned to UStringPrepProfileType */
62 static const char * const PROFILE_NAMES[] = {
63 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
64 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
65 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
66 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
67 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
68 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
69 "rfc3722", /* USPREP_RFC3722_ISCSI */
70 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
71 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
72 "rfc4011", /* USPREP_RFC4011_MIB */
73 "rfc4013", /* USPREP_RFC4013_SASLPREP */
74 "rfc4505", /* USPREP_RFC4505_TRACE */
75 "rfc4518", /* USPREP_RFC4518_LDAP */
76 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
77 };
78
79 static UBool U_CALLCONV
80 isSPrepAcceptable(void * /* context */,
81 const char * /* type */,
82 const char * /* name */,
83 const UDataInfo *pInfo) {
84 if(
85 pInfo->size>=20 &&
86 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
87 pInfo->charsetFamily==U_CHARSET_FAMILY &&
88 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
89 pInfo->dataFormat[1]==0x50 &&
90 pInfo->dataFormat[2]==0x52 &&
91 pInfo->dataFormat[3]==0x50 &&
92 pInfo->formatVersion[0]==3 &&
93 pInfo->formatVersion[2]==UTRIE_SHIFT &&
94 pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
95 ) {
96 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
97 uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
98 return TRUE;
99 } else {
100 return FALSE;
101 }
102 }
103
104 static int32_t U_CALLCONV
105 getSPrepFoldingOffset(uint32_t data) {
106
107 return (int32_t)data;
108
109 }
110
111 /* hashes an entry */
112 static int32_t U_CALLCONV
113 hashEntry(const UHashTok parm) {
114 UStringPrepKey *b = (UStringPrepKey *)parm.pointer;
115 UHashTok namekey, pathkey;
116 namekey.pointer = b->name;
117 pathkey.pointer = b->path;
118 uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) +
119 37u * static_cast<uint32_t>(uhash_hashChars(pathkey));
120 return static_cast<int32_t>(unsignedHash);
121 }
122
123 /* compares two entries */
124 static UBool U_CALLCONV
125 compareEntries(const UHashTok p1, const UHashTok p2) {
126 UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer;
127 UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer;
128 UHashTok name1, name2, path1, path2;
129 name1.pointer = b1->name;
130 name2.pointer = b2->name;
131 path1.pointer = b1->path;
132 path2.pointer = b2->path;
133 return ((UBool)(uhash_compareChars(name1, name2) &
134 uhash_compareChars(path1, path2)));
135 }
136
137 static void
138 usprep_unload(UStringPrepProfile* data){
139 udata_close(data->sprepData);
140 }
141
142 static int32_t
143 usprep_internal_flushCache(UBool noRefCount){
144 UStringPrepProfile *profile = NULL;
145 UStringPrepKey *key = NULL;
146 int32_t pos = UHASH_FIRST;
147 int32_t deletedNum = 0;
148 const UHashElement *e;
149
150 /*
151 * if shared data hasn't even been lazy evaluated yet
152 * return 0
153 */
154 umtx_lock(usprepMutex());
155 if (SHARED_DATA_HASHTABLE == NULL) {
156 umtx_unlock(usprepMutex());
157 return 0;
158 }
159
160 /*creates an enumeration to iterate through every element in the table */
161 while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL)
162 {
163 profile = (UStringPrepProfile *) e->value.pointer;
164 key = (UStringPrepKey *) e->key.pointer;
165
166 if ((noRefCount== FALSE && profile->refCount == 0) ||
167 noRefCount== TRUE) {
168 deletedNum++;
169 uhash_removeElement(SHARED_DATA_HASHTABLE, e);
170
171 /* unload the data */
172 usprep_unload(profile);
173
174 if(key->name != NULL) {
175 uprv_free(key->name);
176 key->name=NULL;
177 }
178 if(key->path != NULL) {
179 uprv_free(key->path);
180 key->path=NULL;
181 }
182 uprv_free(profile);
183 uprv_free(key);
184 }
185
186 }
187 umtx_unlock(usprepMutex());
188
189 return deletedNum;
190 }
191
192 /* Works just like ucnv_flushCache()
193 static int32_t
194 usprep_flushCache(){
195 return usprep_internal_flushCache(FALSE);
196 }
197 */
198
199 static UBool U_CALLCONV usprep_cleanup(void){
200 if (SHARED_DATA_HASHTABLE != NULL) {
201 usprep_internal_flushCache(TRUE);
202 if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) {
203 uhash_close(SHARED_DATA_HASHTABLE);
204 SHARED_DATA_HASHTABLE = NULL;
205 }
206 }
207 gSharedDataInitOnce.reset();
208 return (SHARED_DATA_HASHTABLE == NULL);
209 }
210 U_CDECL_END
211
212
213 /** Initializes the cache for resources */
214 static void U_CALLCONV
215 createCache(UErrorCode &status) {
216 SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
217 if (U_FAILURE(status)) {
218 SHARED_DATA_HASHTABLE = NULL;
219 }
220 ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
221 }
222
223 static void
224 initCache(UErrorCode *status) {
225 umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
226 }
227
228 static UBool U_CALLCONV
229 loadData(UStringPrepProfile* profile,
230 const char* path,
231 const char* name,
232 const char* type,
233 UErrorCode* errorCode) {
234 /* load Unicode SPREP data from file */
235 UTrie _sprepTrie={ 0,0,0,0,0,0,0 };
236 UDataMemory *dataMemory;
237 const int32_t *p=NULL;
238 const uint8_t *pb;
239 UVersionInfo normUnicodeVersion;
240 int32_t normUniVer, sprepUniVer, normCorrVer;
241
242 if(errorCode==NULL || U_FAILURE(*errorCode)) {
243 return 0;
244 }
245
246 /* open the data outside the mutex block */
247 //TODO: change the path
248 dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode);
249 if(U_FAILURE(*errorCode)) {
250 return FALSE;
251 }
252
253 p=(const int32_t *)udata_getMemory(dataMemory);
254 pb=(const uint8_t *)(p+_SPREP_INDEX_TOP);
255 utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode);
256 _sprepTrie.getFoldingOffset=getSPrepFoldingOffset;
257
258
259 if(U_FAILURE(*errorCode)) {
260 udata_close(dataMemory);
261 return FALSE;
262 }
263
264 /* in the mutex block, set the data for this process */
265 umtx_lock(usprepMutex());
266 if(profile->sprepData==NULL) {
267 profile->sprepData=dataMemory;
268 dataMemory=NULL;
269 uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes));
270 uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie));
271 } else {
272 p=(const int32_t *)udata_getMemory(profile->sprepData);
273 }
274 umtx_unlock(usprepMutex());
275 /* initialize some variables */
276 profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
277
278 u_getUnicodeVersion(normUnicodeVersion);
279 normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
280 (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
281 sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
282 (dataVersion[2] << 8 ) + (dataVersion[3]);
283 normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION];
284
285 if(U_FAILURE(*errorCode)){
286 udata_close(dataMemory);
287 return FALSE;
288 }
289 if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
290 normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
291 ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/
292 ){
293 *errorCode = U_INVALID_FORMAT_ERROR;
294 udata_close(dataMemory);
295 return FALSE;
296 }
297 profile->isDataLoaded = TRUE;
298
299 /* if a different thread set it first, then close the extra data */
300 if(dataMemory!=NULL) {
301 udata_close(dataMemory); /* NULL if it was set correctly */
302 }
303
304
305 return profile->isDataLoaded;
306 }
307
308 static UStringPrepProfile*
309 usprep_getProfile(const char* path,
310 const char* name,
311 UErrorCode *status){
312
313 UStringPrepProfile* profile = NULL;
314
315 initCache(status);
316
317 if(U_FAILURE(*status)){
318 return NULL;
319 }
320
321 UStringPrepKey stackKey;
322 /*
323 * const is cast way to save malloc, strcpy and free calls
324 * we use the passed in pointers for fetching the data from the
325 * hash table which is safe
326 */
327 stackKey.name = (char*) name;
328 stackKey.path = (char*) path;
329
330 /* fetch the data from the cache */
331 umtx_lock(usprepMutex());
332 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
333 if(profile != NULL) {
334 profile->refCount++;
335 }
336 umtx_unlock(usprepMutex());
337
338 if(profile == NULL) {
339 /* else load the data and put the data in the cache */
340 LocalMemory<UStringPrepProfile> newProfile;
341 if(newProfile.allocateInsteadAndReset() == NULL) {
342 *status = U_MEMORY_ALLOCATION_ERROR;
343 return NULL;
344 }
345
346 /* load the data */
347 if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
348 return NULL;
349 }
350
351 /* get the options */
352 newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
353 newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
354
355 LocalMemory<UStringPrepKey> key;
356 LocalMemory<char> keyName;
357 LocalMemory<char> keyPath;
358 if( key.allocateInsteadAndReset() == NULL ||
359 keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL ||
360 (path != NULL &&
361 keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL)
362 ) {
363 *status = U_MEMORY_ALLOCATION_ERROR;
364 usprep_unload(newProfile.getAlias());
365 return NULL;
366 }
367
368 umtx_lock(usprepMutex());
369 // If another thread already inserted the same key/value, refcount and cleanup our thread data
370 profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
371 if(profile != NULL) {
372 profile->refCount++;
373 usprep_unload(newProfile.getAlias());
374 }
375 else {
376 /* initialize the key members */
377 key->name = keyName.orphan();
378 uprv_strcpy(key->name, name);
379 if(path != NULL){
380 key->path = keyPath.orphan();
381 uprv_strcpy(key->path, path);
382 }
383 profile = newProfile.orphan();
384
385 /* add the data object to the cache */
386 profile->refCount = 1;
387 uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
388 }
389 umtx_unlock(usprepMutex());
390 }
391
392 return profile;
393 }
394
395 U_CAPI UStringPrepProfile* U_EXPORT2
396 usprep_open(const char* path,
397 const char* name,
398 UErrorCode* status){
399
400 if(status == NULL || U_FAILURE(*status)){
401 return NULL;
402 }
403
404 /* initialize the profile struct members */
405 return usprep_getProfile(path,name,status);
406 }
407
408 U_CAPI UStringPrepProfile* U_EXPORT2
409 usprep_openByType(UStringPrepProfileType type,
410 UErrorCode* status) {
411 if(status == NULL || U_FAILURE(*status)){
412 return NULL;
413 }
414 int32_t index = (int32_t)type;
415 if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
416 *status = U_ILLEGAL_ARGUMENT_ERROR;
417 return NULL;
418 }
419 return usprep_open(NULL, PROFILE_NAMES[index], status);
420 }
421
422 U_CAPI void U_EXPORT2
423 usprep_close(UStringPrepProfile* profile){
424 if(profile==NULL){
425 return;
426 }
427
428 umtx_lock(usprepMutex());
429 /* decrement the ref count*/
430 if(profile->refCount > 0){
431 profile->refCount--;
432 }
433 umtx_unlock(usprepMutex());
434
435 }
436
437 U_CFUNC void
438 uprv_syntaxError(const UChar* rules,
439 int32_t pos,
440 int32_t rulesLen,
441 UParseError* parseError){
442 if(parseError == NULL){
443 return;
444 }
445 parseError->offset = pos;
446 parseError->line = 0 ; // we are not using line numbers
447
448 // for pre-context
449 int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
450 int32_t limit = pos;
451
452 u_memcpy(parseError->preContext,rules+start,limit-start);
453 //null terminate the buffer
454 parseError->preContext[limit-start] = 0;
455
456 // for post-context; include error rules[pos]
457 start = pos;
458 limit = start + (U_PARSE_CONTEXT_LEN-1);
459 if (limit > rulesLen) {
460 limit = rulesLen;
461 }
462 if (start < rulesLen) {
463 u_memcpy(parseError->postContext,rules+start,limit-start);
464 }
465 //null terminate the buffer
466 parseError->postContext[limit-start]= 0;
467 }
468
469
470 static inline UStringPrepType
471 getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){
472
473 UStringPrepType type;
474 if(trieWord == 0){
475 /*
476 * Initial value stored in the mapping table
477 * just return USPREP_TYPE_LIMIT .. so that
478 * the source codepoint is copied to the destination
479 */
480 type = USPREP_TYPE_LIMIT;
481 isIndex =FALSE;
482 value = 0;
483 }else if(trieWord >= _SPREP_TYPE_THRESHOLD){
484 type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD);
485 isIndex =FALSE;
486 value = 0;
487 }else{
488 /* get the type */
489 type = USPREP_MAP;
490 /* ascertain if the value is index or delta */
491 if(trieWord & 0x02){
492 isIndex = TRUE;
493 value = trieWord >> 2; //mask off the lower 2 bits and shift
494 }else{
495 isIndex = FALSE;
496 value = (int16_t)trieWord;
497 value = (value >> 2);
498 }
499
500 if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){
501 type = USPREP_DELETE;
502 isIndex =FALSE;
503 value = 0;
504 }
505 }
506 return type;
507 }
508
509 // TODO: change to writing to UnicodeString not UChar *
510 static int32_t
511 usprep_map( const UStringPrepProfile* profile,
512 const UChar* src, int32_t srcLength,
513 UChar* dest, int32_t destCapacity,
514 int32_t options,
515 UParseError* parseError,
516 UErrorCode* status ){
517
518 uint16_t result;
519 int32_t destIndex=0;
520 int32_t srcIndex;
521 UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0);
522 UStringPrepType type;
523 int16_t value;
524 UBool isIndex;
525 const int32_t* indexes = profile->indexes;
526
527 // no error checking the caller check for error and arguments
528 // no string length check the caller finds out the string length
529
530 for(srcIndex=0;srcIndex<srcLength;){
531 UChar32 ch;
532
533 U16_NEXT(src,srcIndex,srcLength,ch);
534
535 result=0;
536
537 UTRIE_GET16(&profile->sprepTrie,ch,result);
538
539 type = getValues(result, value, isIndex);
540
541 // check if the source codepoint is unassigned
542 if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){
543
544 uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError);
545 *status = U_STRINGPREP_UNASSIGNED_ERROR;
546 return 0;
547
548 }else if(type == USPREP_MAP){
549
550 int32_t index, length;
551
552 if(isIndex){
553 index = value;
554 if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
555 index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
556 length = 1;
557 }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
558 index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
559 length = 2;
560 }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
561 index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
562 length = 3;
563 }else{
564 length = profile->mappingData[index++];
565
566 }
567
568 /* copy mapping to destination */
569 for(int32_t i=0; i< length; i++){
570 if(destIndex < destCapacity ){
571 dest[destIndex] = profile->mappingData[index+i];
572 }
573 destIndex++; /* for pre-flighting */
574 }
575 continue;
576 }else{
577 // subtract the delta to arrive at the code point
578 ch -= value;
579 }
580
581 }else if(type==USPREP_DELETE){
582 // just consume the codepoint and contine
583 continue;
584 }
585 //copy the code point into destination
586 if(ch <= 0xFFFF){
587 if(destIndex < destCapacity ){
588 dest[destIndex] = (UChar)ch;
589 }
590 destIndex++;
591 }else{
592 if(destIndex+1 < destCapacity ){
593 dest[destIndex] = U16_LEAD(ch);
594 dest[destIndex+1] = U16_TRAIL(ch);
595 }
596 destIndex +=2;
597 }
598
599 }
600
601 return u_terminateUChars(dest, destCapacity, destIndex, status);
602 }
603
604 /*
605 1) Map -- For each character in the input, check if it has a mapping
606 and, if so, replace it with its mapping.
607
608 2) Normalize -- Possibly normalize the result of step 1 using Unicode
609 normalization.
610
611 3) Prohibit -- Check for any characters that are not allowed in the
612 output. If any are found, return an error.
613
614 4) Check bidi -- Possibly check for right-to-left characters, and if
615 any are found, make sure that the whole string satisfies the
616 requirements for bidirectional strings. If the string does not
617 satisfy the requirements for bidirectional strings, return an
618 error.
619 [Unicode3.2] defines several bidirectional categories; each character
620 has one bidirectional category assigned to it. For the purposes of
621 the requirements below, an "RandALCat character" is a character that
622 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
623 is a character that has Unicode bidirectional category "L". Note
624
625
626 that there are many characters which fall in neither of the above
627 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
628 this because they have bidirectional category "EN".
629
630 In any profile that specifies bidirectional character handling, all
631 three of the following requirements MUST be met:
632
633 1) The characters in section 5.8 MUST be prohibited.
634
635 2) If a string contains any RandALCat character, the string MUST NOT
636 contain any LCat character.
637
638 3) If a string contains any RandALCat character, a RandALCat
639 character MUST be the first character of the string, and a
640 RandALCat character MUST be the last character of the string.
641 */
642 U_CAPI int32_t U_EXPORT2
643 usprep_prepare( const UStringPrepProfile* profile,
644 const UChar* src, int32_t srcLength,
645 UChar* dest, int32_t destCapacity,
646 int32_t options,
647 UParseError* parseError,
648 UErrorCode* status ){
649
650 // check error status
651 if(U_FAILURE(*status)){
652 return 0;
653 }
654
655 //check arguments
656 if(profile==NULL ||
657 (src==NULL ? srcLength!=0 : srcLength<-1) ||
658 (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
659 *status=U_ILLEGAL_ARGUMENT_ERROR;
660 return 0;
661 }
662
663 //get the string length
664 if(srcLength < 0){
665 srcLength = u_strlen(src);
666 }
667 // map
668 UnicodeString s1;
669 UChar *b1 = s1.getBuffer(srcLength);
670 if(b1==NULL){
671 *status = U_MEMORY_ALLOCATION_ERROR;
672 return 0;
673 }
674 int32_t b1Len = usprep_map(profile, src, srcLength,
675 b1, s1.getCapacity(), options, parseError, status);
676 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
677
678 if(*status == U_BUFFER_OVERFLOW_ERROR){
679 // redo processing of string
680 /* we do not have enough room so grow the buffer*/
681 b1 = s1.getBuffer(b1Len);
682 if(b1==NULL){
683 *status = U_MEMORY_ALLOCATION_ERROR;
684 return 0;
685 }
686
687 *status = U_ZERO_ERROR; // reset error
688 b1Len = usprep_map(profile, src, srcLength,
689 b1, s1.getCapacity(), options, parseError, status);
690 s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
691 }
692 if(U_FAILURE(*status)){
693 return 0;
694 }
695
696 // normalize
697 UnicodeString s2;
698 if(profile->doNFKC){
699 const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
700 FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
701 if(U_FAILURE(*status)){
702 return 0;
703 }
704 fn2.normalize(s1, s2, *status);
705 }else{
706 s2.fastCopyFrom(s1);
707 }
708 if(U_FAILURE(*status)){
709 return 0;
710 }
711
712 // Prohibit and checkBiDi in one pass
713 const UChar *b2 = s2.getBuffer();
714 int32_t b2Len = s2.length();
715 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
716 UBool leftToRight=FALSE, rightToLeft=FALSE;
717 int32_t rtlPos =-1, ltrPos =-1;
718
719 for(int32_t b2Index=0; b2Index<b2Len;){
720 UChar32 ch = 0;
721 U16_NEXT(b2, b2Index, b2Len, ch);
722
723 uint16_t result;
724 UTRIE_GET16(&profile->sprepTrie,ch,result);
725
726 int16_t value;
727 UBool isIndex;
728 UStringPrepType type = getValues(result, value, isIndex);
729
730 if( type == USPREP_PROHIBITED ||
731 ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
732 ){
733 *status = U_STRINGPREP_PROHIBITED_ERROR;
734 uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError);
735 return 0;
736 }
737
738 if(profile->checkBiDi) {
739 direction = ubidi_getClass(ch);
740 if(firstCharDir == U_CHAR_DIRECTION_COUNT){
741 firstCharDir = direction;
742 }
743 if(direction == U_LEFT_TO_RIGHT){
744 leftToRight = TRUE;
745 ltrPos = b2Index-1;
746 }
747 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
748 rightToLeft = TRUE;
749 rtlPos = b2Index-1;
750 }
751 }
752 }
753 if(profile->checkBiDi == TRUE){
754 // satisfy 2
755 if( leftToRight == TRUE && rightToLeft == TRUE){
756 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
757 uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
758 return 0;
759 }
760
761 //satisfy 3
762 if( rightToLeft == TRUE &&
763 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
764 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
765 ){
766 *status = U_STRINGPREP_CHECK_BIDI_ERROR;
767 uprv_syntaxError(b2, rtlPos, b2Len, parseError);
768 return FALSE;
769 }
770 }
771 return s2.extract(dest, destCapacity, *status);
772 }
773
774
775 /* data swapping ------------------------------------------------------------ */
776
777 U_CAPI int32_t U_EXPORT2
778 usprep_swap(const UDataSwapper *ds,
779 const void *inData, int32_t length, void *outData,
780 UErrorCode *pErrorCode) {
781 const UDataInfo *pInfo;
782 int32_t headerSize;
783
784 const uint8_t *inBytes;
785 uint8_t *outBytes;
786
787 const int32_t *inIndexes;
788 int32_t indexes[16];
789
790 int32_t i, offset, count, size;
791
792 /* udata_swapDataHeader checks the arguments */
793 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
794 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
795 return 0;
796 }
797
798 /* check data format and format version */
799 pInfo=(const UDataInfo *)((const char *)inData+4);
800 if(!(
801 pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */
802 pInfo->dataFormat[1]==0x50 &&
803 pInfo->dataFormat[2]==0x52 &&
804 pInfo->dataFormat[3]==0x50 &&
805 pInfo->formatVersion[0]==3
806 )) {
807 udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
808 pInfo->dataFormat[0], pInfo->dataFormat[1],
809 pInfo->dataFormat[2], pInfo->dataFormat[3],
810 pInfo->formatVersion[0]);
811 *pErrorCode=U_UNSUPPORTED_ERROR;
812 return 0;
813 }
814
815 inBytes=(const uint8_t *)inData+headerSize;
816 outBytes=(uint8_t *)outData+headerSize;
817
818 inIndexes=(const int32_t *)inBytes;
819
820 if(length>=0) {
821 length-=headerSize;
822 if(length<16*4) {
823 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
824 length);
825 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
826 return 0;
827 }
828 }
829
830 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
831 for(i=0; i<16; ++i) {
832 indexes[i]=udata_readInt32(ds, inIndexes[i]);
833 }
834
835 /* calculate the total length of the data */
836 size=
837 16*4+ /* size of indexes[] */
838 indexes[_SPREP_INDEX_TRIE_SIZE]+
839 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
840
841 if(length>=0) {
842 if(length<size) {
843 udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
844 length);
845 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
846 return 0;
847 }
848
849 /* copy the data for inaccessible bytes */
850 if(inBytes!=outBytes) {
851 uprv_memcpy(outBytes, inBytes, size);
852 }
853
854 offset=0;
855
856 /* swap the int32_t indexes[] */
857 count=16*4;
858 ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
859 offset+=count;
860
861 /* swap the UTrie */
862 count=indexes[_SPREP_INDEX_TRIE_SIZE];
863 utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
864 offset+=count;
865
866 /* swap the uint16_t mappingTable[] */
867 count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
868 ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
869 //offset+=count;
870 }
871
872 return headerSize+size;
873 }
874
875 #endif /* #if !UCONFIG_NO_IDNA */