1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: usprep.cpp
12 * tab size: 8 (not used)
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
23 #include "unicode/usprep.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
37 #include "ubidi_props.h"
45 Static cache for already opened StringPrep profiles
47 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
48 static icu::UInitOnce gSharedDataInitOnce
= U_INITONCE_INITIALIZER
;
50 static UMutex
*usprepMutex() {
51 static UMutex
*m
= STATIC_NEW(UMutex
);
55 /* format version of spp file */
56 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
58 /* the Unicode version of the sprep data */
59 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
61 /* Profile names must be aligned to UStringPrepProfileType */
62 static const char * const PROFILE_NAMES
[] = {
63 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
64 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
65 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
66 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
67 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
68 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
69 "rfc3722", /* USPREP_RFC3722_ISCSI */
70 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
71 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
72 "rfc4011", /* USPREP_RFC4011_MIB */
73 "rfc4013", /* USPREP_RFC4013_SASLPREP */
74 "rfc4505", /* USPREP_RFC4505_TRACE */
75 "rfc4518", /* USPREP_RFC4518_LDAP */
76 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
79 static UBool U_CALLCONV
80 isSPrepAcceptable(void * /* context */,
81 const char * /* type */,
82 const char * /* name */,
83 const UDataInfo
*pInfo
) {
86 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
87 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
88 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
89 pInfo
->dataFormat
[1]==0x50 &&
90 pInfo
->dataFormat
[2]==0x52 &&
91 pInfo
->dataFormat
[3]==0x50 &&
92 pInfo
->formatVersion
[0]==3 &&
93 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
94 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
96 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
97 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
104 static int32_t U_CALLCONV
105 getSPrepFoldingOffset(uint32_t data
) {
107 return (int32_t)data
;
111 /* hashes an entry */
112 static int32_t U_CALLCONV
113 hashEntry(const UHashTok parm
) {
114 UStringPrepKey
*b
= (UStringPrepKey
*)parm
.pointer
;
115 UHashTok namekey
, pathkey
;
116 namekey
.pointer
= b
->name
;
117 pathkey
.pointer
= b
->path
;
118 uint32_t unsignedHash
= static_cast<uint32_t>(uhash_hashChars(namekey
)) +
119 37u * static_cast<uint32_t>(uhash_hashChars(pathkey
));
120 return static_cast<int32_t>(unsignedHash
);
123 /* compares two entries */
124 static UBool U_CALLCONV
125 compareEntries(const UHashTok p1
, const UHashTok p2
) {
126 UStringPrepKey
*b1
= (UStringPrepKey
*)p1
.pointer
;
127 UStringPrepKey
*b2
= (UStringPrepKey
*)p2
.pointer
;
128 UHashTok name1
, name2
, path1
, path2
;
129 name1
.pointer
= b1
->name
;
130 name2
.pointer
= b2
->name
;
131 path1
.pointer
= b1
->path
;
132 path2
.pointer
= b2
->path
;
133 return ((UBool
)(uhash_compareChars(name1
, name2
) &
134 uhash_compareChars(path1
, path2
)));
138 usprep_unload(UStringPrepProfile
* data
){
139 udata_close(data
->sprepData
);
143 usprep_internal_flushCache(UBool noRefCount
){
144 UStringPrepProfile
*profile
= NULL
;
145 UStringPrepKey
*key
= NULL
;
146 int32_t pos
= UHASH_FIRST
;
147 int32_t deletedNum
= 0;
148 const UHashElement
*e
;
151 * if shared data hasn't even been lazy evaluated yet
154 umtx_lock(usprepMutex());
155 if (SHARED_DATA_HASHTABLE
== NULL
) {
156 umtx_unlock(usprepMutex());
160 /*creates an enumeration to iterate through every element in the table */
161 while ((e
= uhash_nextElement(SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
163 profile
= (UStringPrepProfile
*) e
->value
.pointer
;
164 key
= (UStringPrepKey
*) e
->key
.pointer
;
166 if ((noRefCount
== FALSE
&& profile
->refCount
== 0) ||
169 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
171 /* unload the data */
172 usprep_unload(profile
);
174 if(key
->name
!= NULL
) {
175 uprv_free(key
->name
);
178 if(key
->path
!= NULL
) {
179 uprv_free(key
->path
);
187 umtx_unlock(usprepMutex());
192 /* Works just like ucnv_flushCache()
195 return usprep_internal_flushCache(FALSE);
199 static UBool U_CALLCONV
usprep_cleanup(void){
200 if (SHARED_DATA_HASHTABLE
!= NULL
) {
201 usprep_internal_flushCache(TRUE
);
202 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
203 uhash_close(SHARED_DATA_HASHTABLE
);
204 SHARED_DATA_HASHTABLE
= NULL
;
207 gSharedDataInitOnce
.reset();
208 return (SHARED_DATA_HASHTABLE
== NULL
);
213 /** Initializes the cache for resources */
214 static void U_CALLCONV
215 createCache(UErrorCode
&status
) {
216 SHARED_DATA_HASHTABLE
= uhash_open(hashEntry
, compareEntries
, NULL
, &status
);
217 if (U_FAILURE(status
)) {
218 SHARED_DATA_HASHTABLE
= NULL
;
220 ucln_common_registerCleanup(UCLN_COMMON_USPREP
, usprep_cleanup
);
224 initCache(UErrorCode
*status
) {
225 umtx_initOnce(gSharedDataInitOnce
, &createCache
, *status
);
228 static UBool U_CALLCONV
229 loadData(UStringPrepProfile
* profile
,
233 UErrorCode
* errorCode
) {
234 /* load Unicode SPREP data from file */
235 UTrie _sprepTrie
={ 0,0,0,0,0,0,0 };
236 UDataMemory
*dataMemory
;
237 const int32_t *p
=NULL
;
239 UVersionInfo normUnicodeVersion
;
240 int32_t normUniVer
, sprepUniVer
, normCorrVer
;
242 if(errorCode
==NULL
|| U_FAILURE(*errorCode
)) {
246 /* open the data outside the mutex block */
247 //TODO: change the path
248 dataMemory
=udata_openChoice(path
, type
, name
, isSPrepAcceptable
, NULL
, errorCode
);
249 if(U_FAILURE(*errorCode
)) {
253 p
=(const int32_t *)udata_getMemory(dataMemory
);
254 pb
=(const uint8_t *)(p
+_SPREP_INDEX_TOP
);
255 utrie_unserialize(&_sprepTrie
, pb
, p
[_SPREP_INDEX_TRIE_SIZE
], errorCode
);
256 _sprepTrie
.getFoldingOffset
=getSPrepFoldingOffset
;
259 if(U_FAILURE(*errorCode
)) {
260 udata_close(dataMemory
);
264 /* in the mutex block, set the data for this process */
265 umtx_lock(usprepMutex());
266 if(profile
->sprepData
==NULL
) {
267 profile
->sprepData
=dataMemory
;
269 uprv_memcpy(&profile
->indexes
, p
, sizeof(profile
->indexes
));
270 uprv_memcpy(&profile
->sprepTrie
, &_sprepTrie
, sizeof(UTrie
));
272 p
=(const int32_t *)udata_getMemory(profile
->sprepData
);
274 umtx_unlock(usprepMutex());
275 /* initialize some variables */
276 profile
->mappingData
=(uint16_t *)((uint8_t *)(p
+_SPREP_INDEX_TOP
)+profile
->indexes
[_SPREP_INDEX_TRIE_SIZE
]);
278 u_getUnicodeVersion(normUnicodeVersion
);
279 normUniVer
= (normUnicodeVersion
[0] << 24) + (normUnicodeVersion
[1] << 16) +
280 (normUnicodeVersion
[2] << 8 ) + (normUnicodeVersion
[3]);
281 sprepUniVer
= (dataVersion
[0] << 24) + (dataVersion
[1] << 16) +
282 (dataVersion
[2] << 8 ) + (dataVersion
[3]);
283 normCorrVer
= profile
->indexes
[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION
];
285 if(U_FAILURE(*errorCode
)){
286 udata_close(dataMemory
);
289 if( normUniVer
< sprepUniVer
&& /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
290 normUniVer
< normCorrVer
&& /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
291 ((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0) /* normalization turned on*/
293 *errorCode
= U_INVALID_FORMAT_ERROR
;
294 udata_close(dataMemory
);
297 profile
->isDataLoaded
= TRUE
;
299 /* if a different thread set it first, then close the extra data */
300 if(dataMemory
!=NULL
) {
301 udata_close(dataMemory
); /* NULL if it was set correctly */
305 return profile
->isDataLoaded
;
308 static UStringPrepProfile
*
309 usprep_getProfile(const char* path
,
313 UStringPrepProfile
* profile
= NULL
;
317 if(U_FAILURE(*status
)){
321 UStringPrepKey stackKey
;
323 * const is cast way to save malloc, strcpy and free calls
324 * we use the passed in pointers for fetching the data from the
325 * hash table which is safe
327 stackKey
.name
= (char*) name
;
328 stackKey
.path
= (char*) path
;
330 /* fetch the data from the cache */
331 umtx_lock(usprepMutex());
332 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
333 if(profile
!= NULL
) {
336 umtx_unlock(usprepMutex());
338 if(profile
== NULL
) {
339 /* else load the data and put the data in the cache */
340 LocalMemory
<UStringPrepProfile
> newProfile
;
341 if(newProfile
.allocateInsteadAndReset() == NULL
) {
342 *status
= U_MEMORY_ALLOCATION_ERROR
;
347 if(!loadData(newProfile
.getAlias(), path
, name
, _SPREP_DATA_TYPE
, status
) || U_FAILURE(*status
) ){
351 /* get the options */
352 newProfile
->doNFKC
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0);
353 newProfile
->checkBiDi
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_CHECK_BIDI_ON
) > 0);
355 LocalMemory
<UStringPrepKey
> key
;
356 LocalMemory
<char> keyName
;
357 LocalMemory
<char> keyPath
;
358 if( key
.allocateInsteadAndReset() == NULL
||
359 keyName
.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name
)+1)) == NULL
||
361 keyPath
.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path
)+1)) == NULL
)
363 *status
= U_MEMORY_ALLOCATION_ERROR
;
364 usprep_unload(newProfile
.getAlias());
368 umtx_lock(usprepMutex());
369 // If another thread already inserted the same key/value, refcount and cleanup our thread data
370 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
371 if(profile
!= NULL
) {
373 usprep_unload(newProfile
.getAlias());
376 /* initialize the key members */
377 key
->name
= keyName
.orphan();
378 uprv_strcpy(key
->name
, name
);
380 key
->path
= keyPath
.orphan();
381 uprv_strcpy(key
->path
, path
);
383 profile
= newProfile
.orphan();
385 /* add the data object to the cache */
386 profile
->refCount
= 1;
387 uhash_put(SHARED_DATA_HASHTABLE
, key
.orphan(), profile
, status
);
389 umtx_unlock(usprepMutex());
395 U_CAPI UStringPrepProfile
* U_EXPORT2
396 usprep_open(const char* path
,
400 if(status
== NULL
|| U_FAILURE(*status
)){
404 /* initialize the profile struct members */
405 return usprep_getProfile(path
,name
,status
);
408 U_CAPI UStringPrepProfile
* U_EXPORT2
409 usprep_openByType(UStringPrepProfileType type
,
410 UErrorCode
* status
) {
411 if(status
== NULL
|| U_FAILURE(*status
)){
414 int32_t index
= (int32_t)type
;
415 if (index
< 0 || index
>= UPRV_LENGTHOF(PROFILE_NAMES
)) {
416 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
419 return usprep_open(NULL
, PROFILE_NAMES
[index
], status
);
422 U_CAPI
void U_EXPORT2
423 usprep_close(UStringPrepProfile
* profile
){
428 umtx_lock(usprepMutex());
429 /* decrement the ref count*/
430 if(profile
->refCount
> 0){
433 umtx_unlock(usprepMutex());
438 uprv_syntaxError(const UChar
* rules
,
441 UParseError
* parseError
){
442 if(parseError
== NULL
){
445 parseError
->offset
= pos
;
446 parseError
->line
= 0 ; // we are not using line numbers
449 int32_t start
= (pos
< U_PARSE_CONTEXT_LEN
)? 0 : (pos
- (U_PARSE_CONTEXT_LEN
-1));
452 u_memcpy(parseError
->preContext
,rules
+start
,limit
-start
);
453 //null terminate the buffer
454 parseError
->preContext
[limit
-start
] = 0;
456 // for post-context; include error rules[pos]
458 limit
= start
+ (U_PARSE_CONTEXT_LEN
-1);
459 if (limit
> rulesLen
) {
462 if (start
< rulesLen
) {
463 u_memcpy(parseError
->postContext
,rules
+start
,limit
-start
);
465 //null terminate the buffer
466 parseError
->postContext
[limit
-start
]= 0;
470 static inline UStringPrepType
471 getValues(uint16_t trieWord
, int16_t& value
, UBool
& isIndex
){
473 UStringPrepType type
;
476 * Initial value stored in the mapping table
477 * just return USPREP_TYPE_LIMIT .. so that
478 * the source codepoint is copied to the destination
480 type
= USPREP_TYPE_LIMIT
;
483 }else if(trieWord
>= _SPREP_TYPE_THRESHOLD
){
484 type
= (UStringPrepType
) (trieWord
- _SPREP_TYPE_THRESHOLD
);
490 /* ascertain if the value is index or delta */
493 value
= trieWord
>> 2; //mask off the lower 2 bits and shift
496 value
= (int16_t)trieWord
;
497 value
= (value
>> 2);
500 if((trieWord
>>2) == _SPREP_MAX_INDEX_VALUE
){
501 type
= USPREP_DELETE
;
509 // TODO: change to writing to UnicodeString not UChar *
511 usprep_map( const UStringPrepProfile
* profile
,
512 const UChar
* src
, int32_t srcLength
,
513 UChar
* dest
, int32_t destCapacity
,
515 UParseError
* parseError
,
516 UErrorCode
* status
){
521 UBool allowUnassigned
= (UBool
) ((options
& USPREP_ALLOW_UNASSIGNED
)>0);
522 UStringPrepType type
;
525 const int32_t* indexes
= profile
->indexes
;
527 // no error checking the caller check for error and arguments
528 // no string length check the caller finds out the string length
530 for(srcIndex
=0;srcIndex
<srcLength
;){
533 U16_NEXT(src
,srcIndex
,srcLength
,ch
);
537 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
539 type
= getValues(result
, value
, isIndex
);
541 // check if the source codepoint is unassigned
542 if(type
== USPREP_UNASSIGNED
&& allowUnassigned
== FALSE
){
544 uprv_syntaxError(src
,srcIndex
-U16_LENGTH(ch
), srcLength
,parseError
);
545 *status
= U_STRINGPREP_UNASSIGNED_ERROR
;
548 }else if(type
== USPREP_MAP
){
550 int32_t index
, length
;
554 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
555 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
557 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
558 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
560 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
561 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
564 length
= profile
->mappingData
[index
++];
568 /* copy mapping to destination */
569 for(int32_t i
=0; i
< length
; i
++){
570 if(destIndex
< destCapacity
){
571 dest
[destIndex
] = profile
->mappingData
[index
+i
];
573 destIndex
++; /* for pre-flighting */
577 // subtract the delta to arrive at the code point
581 }else if(type
==USPREP_DELETE
){
582 // just consume the codepoint and contine
585 //copy the code point into destination
587 if(destIndex
< destCapacity
){
588 dest
[destIndex
] = (UChar
)ch
;
592 if(destIndex
+1 < destCapacity
){
593 dest
[destIndex
] = U16_LEAD(ch
);
594 dest
[destIndex
+1] = U16_TRAIL(ch
);
601 return u_terminateUChars(dest
, destCapacity
, destIndex
, status
);
605 1) Map -- For each character in the input, check if it has a mapping
606 and, if so, replace it with its mapping.
608 2) Normalize -- Possibly normalize the result of step 1 using Unicode
611 3) Prohibit -- Check for any characters that are not allowed in the
612 output. If any are found, return an error.
614 4) Check bidi -- Possibly check for right-to-left characters, and if
615 any are found, make sure that the whole string satisfies the
616 requirements for bidirectional strings. If the string does not
617 satisfy the requirements for bidirectional strings, return an
619 [Unicode3.2] defines several bidirectional categories; each character
620 has one bidirectional category assigned to it. For the purposes of
621 the requirements below, an "RandALCat character" is a character that
622 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
623 is a character that has Unicode bidirectional category "L". Note
626 that there are many characters which fall in neither of the above
627 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
628 this because they have bidirectional category "EN".
630 In any profile that specifies bidirectional character handling, all
631 three of the following requirements MUST be met:
633 1) The characters in section 5.8 MUST be prohibited.
635 2) If a string contains any RandALCat character, the string MUST NOT
636 contain any LCat character.
638 3) If a string contains any RandALCat character, a RandALCat
639 character MUST be the first character of the string, and a
640 RandALCat character MUST be the last character of the string.
642 U_CAPI
int32_t U_EXPORT2
643 usprep_prepare( const UStringPrepProfile
* profile
,
644 const UChar
* src
, int32_t srcLength
,
645 UChar
* dest
, int32_t destCapacity
,
647 UParseError
* parseError
,
648 UErrorCode
* status
){
650 // check error status
651 if(U_FAILURE(*status
)){
657 (src
==NULL
? srcLength
!=0 : srcLength
<-1) ||
658 (dest
==NULL
? destCapacity
!=0 : destCapacity
<0)) {
659 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
663 //get the string length
665 srcLength
= u_strlen(src
);
669 UChar
*b1
= s1
.getBuffer(srcLength
);
671 *status
= U_MEMORY_ALLOCATION_ERROR
;
674 int32_t b1Len
= usprep_map(profile
, src
, srcLength
,
675 b1
, s1
.getCapacity(), options
, parseError
, status
);
676 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
678 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
679 // redo processing of string
680 /* we do not have enough room so grow the buffer*/
681 b1
= s1
.getBuffer(b1Len
);
683 *status
= U_MEMORY_ALLOCATION_ERROR
;
687 *status
= U_ZERO_ERROR
; // reset error
688 b1Len
= usprep_map(profile
, src
, srcLength
,
689 b1
, s1
.getCapacity(), options
, parseError
, status
);
690 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
692 if(U_FAILURE(*status
)){
699 const Normalizer2
*n2
= Normalizer2::getNFKCInstance(*status
);
700 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*status
));
701 if(U_FAILURE(*status
)){
704 fn2
.normalize(s1
, s2
, *status
);
708 if(U_FAILURE(*status
)){
712 // Prohibit and checkBiDi in one pass
713 const UChar
*b2
= s2
.getBuffer();
714 int32_t b2Len
= s2
.length();
715 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
716 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
717 int32_t rtlPos
=-1, ltrPos
=-1;
719 for(int32_t b2Index
=0; b2Index
<b2Len
;){
721 U16_NEXT(b2
, b2Index
, b2Len
, ch
);
724 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
728 UStringPrepType type
= getValues(result
, value
, isIndex
);
730 if( type
== USPREP_PROHIBITED
||
731 ((result
< _SPREP_TYPE_THRESHOLD
) && (result
& 0x01) /* first bit says it the code point is prohibited*/)
733 *status
= U_STRINGPREP_PROHIBITED_ERROR
;
734 uprv_syntaxError(b2
, b2Index
-U16_LENGTH(ch
), b2Len
, parseError
);
738 if(profile
->checkBiDi
) {
739 direction
= ubidi_getClass(ch
);
740 if(firstCharDir
== U_CHAR_DIRECTION_COUNT
){
741 firstCharDir
= direction
;
743 if(direction
== U_LEFT_TO_RIGHT
){
747 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
753 if(profile
->checkBiDi
== TRUE
){
755 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
756 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
757 uprv_syntaxError(b2
,(rtlPos
>ltrPos
) ? rtlPos
: ltrPos
, b2Len
, parseError
);
762 if( rightToLeft
== TRUE
&&
763 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
764 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
766 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
767 uprv_syntaxError(b2
, rtlPos
, b2Len
, parseError
);
771 return s2
.extract(dest
, destCapacity
, *status
);
775 /* data swapping ------------------------------------------------------------ */
777 U_CAPI
int32_t U_EXPORT2
778 usprep_swap(const UDataSwapper
*ds
,
779 const void *inData
, int32_t length
, void *outData
,
780 UErrorCode
*pErrorCode
) {
781 const UDataInfo
*pInfo
;
784 const uint8_t *inBytes
;
787 const int32_t *inIndexes
;
790 int32_t i
, offset
, count
, size
;
792 /* udata_swapDataHeader checks the arguments */
793 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
794 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
798 /* check data format and format version */
799 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
801 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
802 pInfo
->dataFormat
[1]==0x50 &&
803 pInfo
->dataFormat
[2]==0x52 &&
804 pInfo
->dataFormat
[3]==0x50 &&
805 pInfo
->formatVersion
[0]==3
807 udata_printError(ds
, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
808 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
809 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
810 pInfo
->formatVersion
[0]);
811 *pErrorCode
=U_UNSUPPORTED_ERROR
;
815 inBytes
=(const uint8_t *)inData
+headerSize
;
816 outBytes
=(uint8_t *)outData
+headerSize
;
818 inIndexes
=(const int32_t *)inBytes
;
823 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
825 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
830 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
831 for(i
=0; i
<16; ++i
) {
832 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
835 /* calculate the total length of the data */
837 16*4+ /* size of indexes[] */
838 indexes
[_SPREP_INDEX_TRIE_SIZE
]+
839 indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
843 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
845 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
849 /* copy the data for inaccessible bytes */
850 if(inBytes
!=outBytes
) {
851 uprv_memcpy(outBytes
, inBytes
, size
);
856 /* swap the int32_t indexes[] */
858 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
862 count
=indexes
[_SPREP_INDEX_TRIE_SIZE
];
863 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
866 /* swap the uint16_t mappingTable[] */
867 count
=indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
868 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
872 return headerSize
+size
;
875 #endif /* #if !UCONFIG_NO_IDNA */