1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: usprep.cpp
12 * tab size: 8 (not used)
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
23 #include "unicode/usprep.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
37 #include "ubidi_props.h"
45 Static cache for already opened StringPrep profiles
47 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
48 static icu::UInitOnce gSharedDataInitOnce
;
50 static UMutex usprepMutex
= U_MUTEX_INITIALIZER
;
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES
[] = {
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
76 static UBool U_CALLCONV
77 isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo
*pInfo
) {
83 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
84 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
85 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo
->dataFormat
[1]==0x50 &&
87 pInfo
->dataFormat
[2]==0x52 &&
88 pInfo
->dataFormat
[3]==0x50 &&
89 pInfo
->formatVersion
[0]==3 &&
90 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
91 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
101 static int32_t U_CALLCONV
102 getSPrepFoldingOffset(uint32_t data
) {
104 return (int32_t)data
;
108 /* hashes an entry */
109 static int32_t U_CALLCONV
110 hashEntry(const UHashTok parm
) {
111 UStringPrepKey
*b
= (UStringPrepKey
*)parm
.pointer
;
112 UHashTok namekey
, pathkey
;
113 namekey
.pointer
= b
->name
;
114 pathkey
.pointer
= b
->path
;
115 return uhash_hashChars(namekey
)+37*uhash_hashChars(pathkey
);
118 /* compares two entries */
119 static UBool U_CALLCONV
120 compareEntries(const UHashTok p1
, const UHashTok p2
) {
121 UStringPrepKey
*b1
= (UStringPrepKey
*)p1
.pointer
;
122 UStringPrepKey
*b2
= (UStringPrepKey
*)p2
.pointer
;
123 UHashTok name1
, name2
, path1
, path2
;
124 name1
.pointer
= b1
->name
;
125 name2
.pointer
= b2
->name
;
126 path1
.pointer
= b1
->path
;
127 path2
.pointer
= b2
->path
;
128 return ((UBool
)(uhash_compareChars(name1
, name2
) &
129 uhash_compareChars(path1
, path2
)));
133 usprep_unload(UStringPrepProfile
* data
){
134 udata_close(data
->sprepData
);
138 usprep_internal_flushCache(UBool noRefCount
){
139 UStringPrepProfile
*profile
= NULL
;
140 UStringPrepKey
*key
= NULL
;
141 int32_t pos
= UHASH_FIRST
;
142 int32_t deletedNum
= 0;
143 const UHashElement
*e
;
146 * if shared data hasn't even been lazy evaluated yet
149 umtx_lock(&usprepMutex
);
150 if (SHARED_DATA_HASHTABLE
== NULL
) {
151 umtx_unlock(&usprepMutex
);
155 /*creates an enumeration to iterate through every element in the table */
156 while ((e
= uhash_nextElement(SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
158 profile
= (UStringPrepProfile
*) e
->value
.pointer
;
159 key
= (UStringPrepKey
*) e
->key
.pointer
;
161 if ((noRefCount
== FALSE
&& profile
->refCount
== 0) ||
164 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
166 /* unload the data */
167 usprep_unload(profile
);
169 if(key
->name
!= NULL
) {
170 uprv_free(key
->name
);
173 if(key
->path
!= NULL
) {
174 uprv_free(key
->path
);
182 umtx_unlock(&usprepMutex
);
187 /* Works just like ucnv_flushCache()
190 return usprep_internal_flushCache(FALSE);
194 static UBool U_CALLCONV
usprep_cleanup(void){
195 if (SHARED_DATA_HASHTABLE
!= NULL
) {
196 usprep_internal_flushCache(TRUE
);
197 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
198 uhash_close(SHARED_DATA_HASHTABLE
);
199 SHARED_DATA_HASHTABLE
= NULL
;
202 gSharedDataInitOnce
.reset();
203 return (SHARED_DATA_HASHTABLE
== NULL
);
208 /** Initializes the cache for resources */
209 static void U_CALLCONV
210 createCache(UErrorCode
&status
) {
211 SHARED_DATA_HASHTABLE
= uhash_open(hashEntry
, compareEntries
, NULL
, &status
);
212 if (U_FAILURE(status
)) {
213 SHARED_DATA_HASHTABLE
= NULL
;
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP
, usprep_cleanup
);
219 initCache(UErrorCode
*status
) {
220 umtx_initOnce(gSharedDataInitOnce
, &createCache
, *status
);
223 static UBool U_CALLCONV
224 loadData(UStringPrepProfile
* profile
,
228 UErrorCode
* errorCode
) {
229 /* load Unicode SPREP data from file */
230 UTrie _sprepTrie
={ 0,0,0,0,0,0,0 };
231 UDataMemory
*dataMemory
;
232 const int32_t *p
=NULL
;
234 UVersionInfo normUnicodeVersion
;
235 int32_t normUniVer
, sprepUniVer
, normCorrVer
;
237 if(errorCode
==NULL
|| U_FAILURE(*errorCode
)) {
241 /* open the data outside the mutex block */
242 //TODO: change the path
243 dataMemory
=udata_openChoice(path
, type
, name
, isSPrepAcceptable
, NULL
, errorCode
);
244 if(U_FAILURE(*errorCode
)) {
248 p
=(const int32_t *)udata_getMemory(dataMemory
);
249 pb
=(const uint8_t *)(p
+_SPREP_INDEX_TOP
);
250 utrie_unserialize(&_sprepTrie
, pb
, p
[_SPREP_INDEX_TRIE_SIZE
], errorCode
);
251 _sprepTrie
.getFoldingOffset
=getSPrepFoldingOffset
;
254 if(U_FAILURE(*errorCode
)) {
255 udata_close(dataMemory
);
259 /* in the mutex block, set the data for this process */
260 umtx_lock(&usprepMutex
);
261 if(profile
->sprepData
==NULL
) {
262 profile
->sprepData
=dataMemory
;
264 uprv_memcpy(&profile
->indexes
, p
, sizeof(profile
->indexes
));
265 uprv_memcpy(&profile
->sprepTrie
, &_sprepTrie
, sizeof(UTrie
));
267 p
=(const int32_t *)udata_getMemory(profile
->sprepData
);
269 umtx_unlock(&usprepMutex
);
270 /* initialize some variables */
271 profile
->mappingData
=(uint16_t *)((uint8_t *)(p
+_SPREP_INDEX_TOP
)+profile
->indexes
[_SPREP_INDEX_TRIE_SIZE
]);
273 u_getUnicodeVersion(normUnicodeVersion
);
274 normUniVer
= (normUnicodeVersion
[0] << 24) + (normUnicodeVersion
[1] << 16) +
275 (normUnicodeVersion
[2] << 8 ) + (normUnicodeVersion
[3]);
276 sprepUniVer
= (dataVersion
[0] << 24) + (dataVersion
[1] << 16) +
277 (dataVersion
[2] << 8 ) + (dataVersion
[3]);
278 normCorrVer
= profile
->indexes
[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION
];
280 if(U_FAILURE(*errorCode
)){
281 udata_close(dataMemory
);
284 if( normUniVer
< sprepUniVer
&& /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 normUniVer
< normCorrVer
&& /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 ((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0) /* normalization turned on*/
288 *errorCode
= U_INVALID_FORMAT_ERROR
;
289 udata_close(dataMemory
);
292 profile
->isDataLoaded
= TRUE
;
294 /* if a different thread set it first, then close the extra data */
295 if(dataMemory
!=NULL
) {
296 udata_close(dataMemory
); /* NULL if it was set correctly */
300 return profile
->isDataLoaded
;
303 static UStringPrepProfile
*
304 usprep_getProfile(const char* path
,
308 UStringPrepProfile
* profile
= NULL
;
312 if(U_FAILURE(*status
)){
316 UStringPrepKey stackKey
;
318 * const is cast way to save malloc, strcpy and free calls
319 * we use the passed in pointers for fetching the data from the
320 * hash table which is safe
322 stackKey
.name
= (char*) name
;
323 stackKey
.path
= (char*) path
;
325 /* fetch the data from the cache */
326 umtx_lock(&usprepMutex
);
327 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
328 if(profile
!= NULL
) {
331 umtx_unlock(&usprepMutex
);
333 if(profile
== NULL
) {
334 /* else load the data and put the data in the cache */
335 LocalMemory
<UStringPrepProfile
> newProfile
;
336 if(newProfile
.allocateInsteadAndReset() == NULL
) {
337 *status
= U_MEMORY_ALLOCATION_ERROR
;
342 if(!loadData(newProfile
.getAlias(), path
, name
, _SPREP_DATA_TYPE
, status
) || U_FAILURE(*status
) ){
346 /* get the options */
347 newProfile
->doNFKC
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0);
348 newProfile
->checkBiDi
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_CHECK_BIDI_ON
) > 0);
350 LocalMemory
<UStringPrepKey
> key
;
351 LocalMemory
<char> keyName
;
352 LocalMemory
<char> keyPath
;
353 if( key
.allocateInsteadAndReset() == NULL
||
354 keyName
.allocateInsteadAndCopy(uprv_strlen(name
)+1) == NULL
||
356 keyPath
.allocateInsteadAndCopy(uprv_strlen(path
)+1) == NULL
)
358 *status
= U_MEMORY_ALLOCATION_ERROR
;
359 usprep_unload(newProfile
.getAlias());
363 umtx_lock(&usprepMutex
);
364 // If another thread already inserted the same key/value, refcount and cleanup our thread data
365 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
366 if(profile
!= NULL
) {
368 usprep_unload(newProfile
.getAlias());
371 /* initialize the key members */
372 key
->name
= keyName
.orphan();
373 uprv_strcpy(key
->name
, name
);
375 key
->path
= keyPath
.orphan();
376 uprv_strcpy(key
->path
, path
);
378 profile
= newProfile
.orphan();
380 /* add the data object to the cache */
381 profile
->refCount
= 1;
382 uhash_put(SHARED_DATA_HASHTABLE
, key
.orphan(), profile
, status
);
384 umtx_unlock(&usprepMutex
);
390 U_CAPI UStringPrepProfile
* U_EXPORT2
391 usprep_open(const char* path
,
395 if(status
== NULL
|| U_FAILURE(*status
)){
399 /* initialize the profile struct members */
400 return usprep_getProfile(path
,name
,status
);
403 U_CAPI UStringPrepProfile
* U_EXPORT2
404 usprep_openByType(UStringPrepProfileType type
,
405 UErrorCode
* status
) {
406 if(status
== NULL
|| U_FAILURE(*status
)){
409 int32_t index
= (int32_t)type
;
410 if (index
< 0 || index
>= UPRV_LENGTHOF(PROFILE_NAMES
)) {
411 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
414 return usprep_open(NULL
, PROFILE_NAMES
[index
], status
);
417 U_CAPI
void U_EXPORT2
418 usprep_close(UStringPrepProfile
* profile
){
423 umtx_lock(&usprepMutex
);
424 /* decrement the ref count*/
425 if(profile
->refCount
> 0){
428 umtx_unlock(&usprepMutex
);
433 uprv_syntaxError(const UChar
* rules
,
436 UParseError
* parseError
){
437 if(parseError
== NULL
){
440 parseError
->offset
= pos
;
441 parseError
->line
= 0 ; // we are not using line numbers
444 int32_t start
= (pos
< U_PARSE_CONTEXT_LEN
)? 0 : (pos
- (U_PARSE_CONTEXT_LEN
-1));
447 u_memcpy(parseError
->preContext
,rules
+start
,limit
-start
);
448 //null terminate the buffer
449 parseError
->preContext
[limit
-start
] = 0;
451 // for post-context; include error rules[pos]
453 limit
= start
+ (U_PARSE_CONTEXT_LEN
-1);
454 if (limit
> rulesLen
) {
457 if (start
< rulesLen
) {
458 u_memcpy(parseError
->postContext
,rules
+start
,limit
-start
);
460 //null terminate the buffer
461 parseError
->postContext
[limit
-start
]= 0;
465 static inline UStringPrepType
466 getValues(uint16_t trieWord
, int16_t& value
, UBool
& isIndex
){
468 UStringPrepType type
;
471 * Initial value stored in the mapping table
472 * just return USPREP_TYPE_LIMIT .. so that
473 * the source codepoint is copied to the destination
475 type
= USPREP_TYPE_LIMIT
;
478 }else if(trieWord
>= _SPREP_TYPE_THRESHOLD
){
479 type
= (UStringPrepType
) (trieWord
- _SPREP_TYPE_THRESHOLD
);
485 /* ascertain if the value is index or delta */
488 value
= trieWord
>> 2; //mask off the lower 2 bits and shift
491 value
= (int16_t)trieWord
;
492 value
= (value
>> 2);
495 if((trieWord
>>2) == _SPREP_MAX_INDEX_VALUE
){
496 type
= USPREP_DELETE
;
504 // TODO: change to writing to UnicodeString not UChar *
506 usprep_map( const UStringPrepProfile
* profile
,
507 const UChar
* src
, int32_t srcLength
,
508 UChar
* dest
, int32_t destCapacity
,
510 UParseError
* parseError
,
511 UErrorCode
* status
){
516 UBool allowUnassigned
= (UBool
) ((options
& USPREP_ALLOW_UNASSIGNED
)>0);
517 UStringPrepType type
;
520 const int32_t* indexes
= profile
->indexes
;
522 // no error checking the caller check for error and arguments
523 // no string length check the caller finds out the string length
525 for(srcIndex
=0;srcIndex
<srcLength
;){
528 U16_NEXT(src
,srcIndex
,srcLength
,ch
);
532 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
534 type
= getValues(result
, value
, isIndex
);
536 // check if the source codepoint is unassigned
537 if(type
== USPREP_UNASSIGNED
&& allowUnassigned
== FALSE
){
539 uprv_syntaxError(src
,srcIndex
-U16_LENGTH(ch
), srcLength
,parseError
);
540 *status
= U_STRINGPREP_UNASSIGNED_ERROR
;
543 }else if(type
== USPREP_MAP
){
545 int32_t index
, length
;
549 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
550 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
552 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
553 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
555 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
556 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
559 length
= profile
->mappingData
[index
++];
563 /* copy mapping to destination */
564 for(int32_t i
=0; i
< length
; i
++){
565 if(destIndex
< destCapacity
){
566 dest
[destIndex
] = profile
->mappingData
[index
+i
];
568 destIndex
++; /* for pre-flighting */
572 // subtract the delta to arrive at the code point
576 }else if(type
==USPREP_DELETE
){
577 // just consume the codepoint and contine
580 //copy the code point into destination
582 if(destIndex
< destCapacity
){
583 dest
[destIndex
] = (UChar
)ch
;
587 if(destIndex
+1 < destCapacity
){
588 dest
[destIndex
] = U16_LEAD(ch
);
589 dest
[destIndex
+1] = U16_TRAIL(ch
);
596 return u_terminateUChars(dest
, destCapacity
, destIndex
, status
);
600 1) Map -- For each character in the input, check if it has a mapping
601 and, if so, replace it with its mapping.
603 2) Normalize -- Possibly normalize the result of step 1 using Unicode
606 3) Prohibit -- Check for any characters that are not allowed in the
607 output. If any are found, return an error.
609 4) Check bidi -- Possibly check for right-to-left characters, and if
610 any are found, make sure that the whole string satisfies the
611 requirements for bidirectional strings. If the string does not
612 satisfy the requirements for bidirectional strings, return an
614 [Unicode3.2] defines several bidirectional categories; each character
615 has one bidirectional category assigned to it. For the purposes of
616 the requirements below, an "RandALCat character" is a character that
617 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
618 is a character that has Unicode bidirectional category "L". Note
621 that there are many characters which fall in neither of the above
622 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
623 this because they have bidirectional category "EN".
625 In any profile that specifies bidirectional character handling, all
626 three of the following requirements MUST be met:
628 1) The characters in section 5.8 MUST be prohibited.
630 2) If a string contains any RandALCat character, the string MUST NOT
631 contain any LCat character.
633 3) If a string contains any RandALCat character, a RandALCat
634 character MUST be the first character of the string, and a
635 RandALCat character MUST be the last character of the string.
637 U_CAPI
int32_t U_EXPORT2
638 usprep_prepare( const UStringPrepProfile
* profile
,
639 const UChar
* src
, int32_t srcLength
,
640 UChar
* dest
, int32_t destCapacity
,
642 UParseError
* parseError
,
643 UErrorCode
* status
){
645 // check error status
646 if(U_FAILURE(*status
)){
652 (src
==NULL
? srcLength
!=0 : srcLength
<-1) ||
653 (dest
==NULL
? destCapacity
!=0 : destCapacity
<0)) {
654 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
658 //get the string length
660 srcLength
= u_strlen(src
);
664 UChar
*b1
= s1
.getBuffer(srcLength
);
666 *status
= U_MEMORY_ALLOCATION_ERROR
;
669 int32_t b1Len
= usprep_map(profile
, src
, srcLength
,
670 b1
, s1
.getCapacity(), options
, parseError
, status
);
671 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
673 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
674 // redo processing of string
675 /* we do not have enough room so grow the buffer*/
676 b1
= s1
.getBuffer(b1Len
);
678 *status
= U_MEMORY_ALLOCATION_ERROR
;
682 *status
= U_ZERO_ERROR
; // reset error
683 b1Len
= usprep_map(profile
, src
, srcLength
,
684 b1
, s1
.getCapacity(), options
, parseError
, status
);
685 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
687 if(U_FAILURE(*status
)){
694 const Normalizer2
*n2
= Normalizer2::getNFKCInstance(*status
);
695 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*status
));
696 if(U_FAILURE(*status
)){
699 fn2
.normalize(s1
, s2
, *status
);
703 if(U_FAILURE(*status
)){
707 // Prohibit and checkBiDi in one pass
708 const UChar
*b2
= s2
.getBuffer();
709 int32_t b2Len
= s2
.length();
710 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
711 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
712 int32_t rtlPos
=-1, ltrPos
=-1;
714 for(int32_t b2Index
=0; b2Index
<b2Len
;){
716 U16_NEXT(b2
, b2Index
, b2Len
, ch
);
719 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
723 UStringPrepType type
= getValues(result
, value
, isIndex
);
725 if( type
== USPREP_PROHIBITED
||
726 ((result
< _SPREP_TYPE_THRESHOLD
) && (result
& 0x01) /* first bit says it the code point is prohibited*/)
728 *status
= U_STRINGPREP_PROHIBITED_ERROR
;
729 uprv_syntaxError(b1
, b2Index
-U16_LENGTH(ch
), b2Len
, parseError
);
733 if(profile
->checkBiDi
) {
734 direction
= ubidi_getClass(ch
);
735 if(firstCharDir
== U_CHAR_DIRECTION_COUNT
){
736 firstCharDir
= direction
;
738 if(direction
== U_LEFT_TO_RIGHT
){
742 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
748 if(profile
->checkBiDi
== TRUE
){
750 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
751 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
752 uprv_syntaxError(b2
,(rtlPos
>ltrPos
) ? rtlPos
: ltrPos
, b2Len
, parseError
);
757 if( rightToLeft
== TRUE
&&
758 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
759 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
761 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
762 uprv_syntaxError(b2
, rtlPos
, b2Len
, parseError
);
766 return s2
.extract(dest
, destCapacity
, *status
);
770 /* data swapping ------------------------------------------------------------ */
772 U_CAPI
int32_t U_EXPORT2
773 usprep_swap(const UDataSwapper
*ds
,
774 const void *inData
, int32_t length
, void *outData
,
775 UErrorCode
*pErrorCode
) {
776 const UDataInfo
*pInfo
;
779 const uint8_t *inBytes
;
782 const int32_t *inIndexes
;
785 int32_t i
, offset
, count
, size
;
787 /* udata_swapDataHeader checks the arguments */
788 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
789 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
793 /* check data format and format version */
794 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
796 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
797 pInfo
->dataFormat
[1]==0x50 &&
798 pInfo
->dataFormat
[2]==0x52 &&
799 pInfo
->dataFormat
[3]==0x50 &&
800 pInfo
->formatVersion
[0]==3
802 udata_printError(ds
, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
803 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
804 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
805 pInfo
->formatVersion
[0]);
806 *pErrorCode
=U_UNSUPPORTED_ERROR
;
810 inBytes
=(const uint8_t *)inData
+headerSize
;
811 outBytes
=(uint8_t *)outData
+headerSize
;
813 inIndexes
=(const int32_t *)inBytes
;
818 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
820 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
825 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
826 for(i
=0; i
<16; ++i
) {
827 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
830 /* calculate the total length of the data */
832 16*4+ /* size of indexes[] */
833 indexes
[_SPREP_INDEX_TRIE_SIZE
]+
834 indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
838 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
840 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
844 /* copy the data for inaccessible bytes */
845 if(inBytes
!=outBytes
) {
846 uprv_memcpy(outBytes
, inBytes
, size
);
851 /* swap the int32_t indexes[] */
853 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
857 count
=indexes
[_SPREP_INDEX_TRIE_SIZE
];
858 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
861 /* swap the uint16_t mappingTable[] */
862 count
=indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
863 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
867 return headerSize
+size
;
870 #endif /* #if !UCONFIG_NO_IDNA */