1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: usprep.cpp
12 * tab size: 8 (not used)
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
23 #include "unicode/usprep.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
37 #include "ubidi_props.h"
45 Static cache for already opened StringPrep profiles
47 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
48 static icu::UInitOnce gSharedDataInitOnce
;
50 static UMutex usprepMutex
= U_MUTEX_INITIALIZER
;
52 /* format version of spp file */
53 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
55 /* the Unicode version of the sprep data */
56 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
58 /* Profile names must be aligned to UStringPrepProfileType */
59 static const char * const PROFILE_NAMES
[] = {
60 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
61 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
62 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
63 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
64 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
65 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
66 "rfc3722", /* USPREP_RFC3722_ISCSI */
67 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
68 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
69 "rfc4011", /* USPREP_RFC4011_MIB */
70 "rfc4013", /* USPREP_RFC4013_SASLPREP */
71 "rfc4505", /* USPREP_RFC4505_TRACE */
72 "rfc4518", /* USPREP_RFC4518_LDAP */
73 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
76 static UBool U_CALLCONV
77 isSPrepAcceptable(void * /* context */,
78 const char * /* type */,
79 const char * /* name */,
80 const UDataInfo
*pInfo
) {
83 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
84 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
85 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
86 pInfo
->dataFormat
[1]==0x50 &&
87 pInfo
->dataFormat
[2]==0x52 &&
88 pInfo
->dataFormat
[3]==0x50 &&
89 pInfo
->formatVersion
[0]==3 &&
90 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
91 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
93 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
94 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
101 static int32_t U_CALLCONV
102 getSPrepFoldingOffset(uint32_t data
) {
104 return (int32_t)data
;
108 /* hashes an entry */
109 static int32_t U_CALLCONV
110 hashEntry(const UHashTok parm
) {
111 UStringPrepKey
*b
= (UStringPrepKey
*)parm
.pointer
;
112 UHashTok namekey
, pathkey
;
113 namekey
.pointer
= b
->name
;
114 pathkey
.pointer
= b
->path
;
115 return uhash_hashChars(namekey
)+37*uhash_hashChars(pathkey
);
118 /* compares two entries */
119 static UBool U_CALLCONV
120 compareEntries(const UHashTok p1
, const UHashTok p2
) {
121 UStringPrepKey
*b1
= (UStringPrepKey
*)p1
.pointer
;
122 UStringPrepKey
*b2
= (UStringPrepKey
*)p2
.pointer
;
123 UHashTok name1
, name2
, path1
, path2
;
124 name1
.pointer
= b1
->name
;
125 name2
.pointer
= b2
->name
;
126 path1
.pointer
= b1
->path
;
127 path2
.pointer
= b2
->path
;
128 return ((UBool
)(uhash_compareChars(name1
, name2
) &
129 uhash_compareChars(path1
, path2
)));
133 usprep_unload(UStringPrepProfile
* data
){
134 udata_close(data
->sprepData
);
138 usprep_internal_flushCache(UBool noRefCount
){
139 UStringPrepProfile
*profile
= NULL
;
140 UStringPrepKey
*key
= NULL
;
141 int32_t pos
= UHASH_FIRST
;
142 int32_t deletedNum
= 0;
143 const UHashElement
*e
;
146 * if shared data hasn't even been lazy evaluated yet
149 umtx_lock(&usprepMutex
);
150 if (SHARED_DATA_HASHTABLE
== NULL
) {
151 umtx_unlock(&usprepMutex
);
155 /*creates an enumeration to iterate through every element in the table */
156 while ((e
= uhash_nextElement(SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
158 profile
= (UStringPrepProfile
*) e
->value
.pointer
;
159 key
= (UStringPrepKey
*) e
->key
.pointer
;
161 if ((noRefCount
== FALSE
&& profile
->refCount
== 0) ||
164 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
166 /* unload the data */
167 usprep_unload(profile
);
169 if(key
->name
!= NULL
) {
170 uprv_free(key
->name
);
173 if(key
->path
!= NULL
) {
174 uprv_free(key
->path
);
182 umtx_unlock(&usprepMutex
);
187 /* Works just like ucnv_flushCache()
190 return usprep_internal_flushCache(FALSE);
194 static UBool U_CALLCONV
usprep_cleanup(void){
195 if (SHARED_DATA_HASHTABLE
!= NULL
) {
196 usprep_internal_flushCache(TRUE
);
197 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
198 uhash_close(SHARED_DATA_HASHTABLE
);
199 SHARED_DATA_HASHTABLE
= NULL
;
202 gSharedDataInitOnce
.reset();
203 return (SHARED_DATA_HASHTABLE
== NULL
);
208 /** Initializes the cache for resources */
209 static void U_CALLCONV
210 createCache(UErrorCode
&status
) {
211 SHARED_DATA_HASHTABLE
= uhash_open(hashEntry
, compareEntries
, NULL
, &status
);
212 if (U_FAILURE(status
)) {
213 SHARED_DATA_HASHTABLE
= NULL
;
215 ucln_common_registerCleanup(UCLN_COMMON_USPREP
, usprep_cleanup
);
219 initCache(UErrorCode
*status
) {
220 umtx_initOnce(gSharedDataInitOnce
, &createCache
, *status
);
223 static UBool U_CALLCONV
224 loadData(UStringPrepProfile
* profile
,
228 UErrorCode
* errorCode
) {
229 /* load Unicode SPREP data from file */
230 UTrie _sprepTrie
={ 0,0,0,0,0,0,0 };
231 UDataMemory
*dataMemory
;
232 const int32_t *p
=NULL
;
234 UVersionInfo normUnicodeVersion
;
235 int32_t normUniVer
, sprepUniVer
, normCorrVer
;
237 if(errorCode
==NULL
|| U_FAILURE(*errorCode
)) {
241 /* open the data outside the mutex block */
242 //TODO: change the path
243 dataMemory
=udata_openChoice(path
, type
, name
, isSPrepAcceptable
, NULL
, errorCode
);
244 if(U_FAILURE(*errorCode
)) {
248 p
=(const int32_t *)udata_getMemory(dataMemory
);
249 pb
=(const uint8_t *)(p
+_SPREP_INDEX_TOP
);
250 utrie_unserialize(&_sprepTrie
, pb
, p
[_SPREP_INDEX_TRIE_SIZE
], errorCode
);
251 _sprepTrie
.getFoldingOffset
=getSPrepFoldingOffset
;
254 if(U_FAILURE(*errorCode
)) {
255 udata_close(dataMemory
);
259 /* in the mutex block, set the data for this process */
260 umtx_lock(&usprepMutex
);
261 if(profile
->sprepData
==NULL
) {
262 profile
->sprepData
=dataMemory
;
264 uprv_memcpy(&profile
->indexes
, p
, sizeof(profile
->indexes
));
265 uprv_memcpy(&profile
->sprepTrie
, &_sprepTrie
, sizeof(UTrie
));
267 p
=(const int32_t *)udata_getMemory(profile
->sprepData
);
269 umtx_unlock(&usprepMutex
);
270 /* initialize some variables */
271 profile
->mappingData
=(uint16_t *)((uint8_t *)(p
+_SPREP_INDEX_TOP
)+profile
->indexes
[_SPREP_INDEX_TRIE_SIZE
]);
273 u_getUnicodeVersion(normUnicodeVersion
);
274 normUniVer
= (normUnicodeVersion
[0] << 24) + (normUnicodeVersion
[1] << 16) +
275 (normUnicodeVersion
[2] << 8 ) + (normUnicodeVersion
[3]);
276 sprepUniVer
= (dataVersion
[0] << 24) + (dataVersion
[1] << 16) +
277 (dataVersion
[2] << 8 ) + (dataVersion
[3]);
278 normCorrVer
= profile
->indexes
[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION
];
280 if(U_FAILURE(*errorCode
)){
281 udata_close(dataMemory
);
284 if( normUniVer
< sprepUniVer
&& /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
285 normUniVer
< normCorrVer
&& /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
286 ((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0) /* normalization turned on*/
288 *errorCode
= U_INVALID_FORMAT_ERROR
;
289 udata_close(dataMemory
);
292 profile
->isDataLoaded
= TRUE
;
294 /* if a different thread set it first, then close the extra data */
295 if(dataMemory
!=NULL
) {
296 udata_close(dataMemory
); /* NULL if it was set correctly */
300 return profile
->isDataLoaded
;
303 static UStringPrepProfile
*
304 usprep_getProfile(const char* path
,
308 UStringPrepProfile
* profile
= NULL
;
312 if(U_FAILURE(*status
)){
316 UStringPrepKey stackKey
;
318 * const is cast way to save malloc, strcpy and free calls
319 * we use the passed in pointers for fetching the data from the
320 * hash table which is safe
322 stackKey
.name
= (char*) name
;
323 stackKey
.path
= (char*) path
;
325 /* fetch the data from the cache */
326 umtx_lock(&usprepMutex
);
327 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
328 if(profile
!= NULL
) {
331 umtx_unlock(&usprepMutex
);
333 if(profile
== NULL
) {
334 /* else load the data and put the data in the cache */
335 LocalMemory
<UStringPrepProfile
> newProfile
;
336 if(newProfile
.allocateInsteadAndReset() == NULL
) {
337 *status
= U_MEMORY_ALLOCATION_ERROR
;
342 if(!loadData(newProfile
.getAlias(), path
, name
, _SPREP_DATA_TYPE
, status
) || U_FAILURE(*status
) ){
346 /* get the options */
347 newProfile
->doNFKC
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0);
348 newProfile
->checkBiDi
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_CHECK_BIDI_ON
) > 0);
350 if(newProfile
->checkBiDi
) {
351 newProfile
->bdp
= ubidi_getSingleton();
354 LocalMemory
<UStringPrepKey
> key
;
355 LocalMemory
<char> keyName
;
356 LocalMemory
<char> keyPath
;
357 if( key
.allocateInsteadAndReset() == NULL
||
358 keyName
.allocateInsteadAndCopy(uprv_strlen(name
)+1) == NULL
||
360 keyPath
.allocateInsteadAndCopy(uprv_strlen(path
)+1) == NULL
)
362 *status
= U_MEMORY_ALLOCATION_ERROR
;
363 usprep_unload(newProfile
.getAlias());
367 umtx_lock(&usprepMutex
);
368 // If another thread already inserted the same key/value, refcount and cleanup our thread data
369 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
370 if(profile
!= NULL
) {
372 usprep_unload(newProfile
.getAlias());
375 /* initialize the key members */
376 key
->name
= keyName
.orphan();
377 uprv_strcpy(key
->name
, name
);
379 key
->path
= keyPath
.orphan();
380 uprv_strcpy(key
->path
, path
);
382 profile
= newProfile
.orphan();
384 /* add the data object to the cache */
385 profile
->refCount
= 1;
386 uhash_put(SHARED_DATA_HASHTABLE
, key
.orphan(), profile
, status
);
388 umtx_unlock(&usprepMutex
);
394 U_CAPI UStringPrepProfile
* U_EXPORT2
395 usprep_open(const char* path
,
399 if(status
== NULL
|| U_FAILURE(*status
)){
403 /* initialize the profile struct members */
404 return usprep_getProfile(path
,name
,status
);
407 U_CAPI UStringPrepProfile
* U_EXPORT2
408 usprep_openByType(UStringPrepProfileType type
,
409 UErrorCode
* status
) {
410 if(status
== NULL
|| U_FAILURE(*status
)){
413 int32_t index
= (int32_t)type
;
414 if (index
< 0 || index
>= UPRV_LENGTHOF(PROFILE_NAMES
)) {
415 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
418 return usprep_open(NULL
, PROFILE_NAMES
[index
], status
);
421 U_CAPI
void U_EXPORT2
422 usprep_close(UStringPrepProfile
* profile
){
427 umtx_lock(&usprepMutex
);
428 /* decrement the ref count*/
429 if(profile
->refCount
> 0){
432 umtx_unlock(&usprepMutex
);
437 uprv_syntaxError(const UChar
* rules
,
440 UParseError
* parseError
){
441 if(parseError
== NULL
){
444 parseError
->offset
= pos
;
445 parseError
->line
= 0 ; // we are not using line numbers
448 int32_t start
= (pos
< U_PARSE_CONTEXT_LEN
)? 0 : (pos
- (U_PARSE_CONTEXT_LEN
-1));
451 u_memcpy(parseError
->preContext
,rules
+start
,limit
-start
);
452 //null terminate the buffer
453 parseError
->preContext
[limit
-start
] = 0;
455 // for post-context; include error rules[pos]
457 limit
= start
+ (U_PARSE_CONTEXT_LEN
-1);
458 if (limit
> rulesLen
) {
461 if (start
< rulesLen
) {
462 u_memcpy(parseError
->postContext
,rules
+start
,limit
-start
);
464 //null terminate the buffer
465 parseError
->postContext
[limit
-start
]= 0;
469 static inline UStringPrepType
470 getValues(uint16_t trieWord
, int16_t& value
, UBool
& isIndex
){
472 UStringPrepType type
;
475 * Initial value stored in the mapping table
476 * just return USPREP_TYPE_LIMIT .. so that
477 * the source codepoint is copied to the destination
479 type
= USPREP_TYPE_LIMIT
;
482 }else if(trieWord
>= _SPREP_TYPE_THRESHOLD
){
483 type
= (UStringPrepType
) (trieWord
- _SPREP_TYPE_THRESHOLD
);
489 /* ascertain if the value is index or delta */
492 value
= trieWord
>> 2; //mask off the lower 2 bits and shift
495 value
= (int16_t)trieWord
;
496 value
= (value
>> 2);
499 if((trieWord
>>2) == _SPREP_MAX_INDEX_VALUE
){
500 type
= USPREP_DELETE
;
508 // TODO: change to writing to UnicodeString not UChar *
510 usprep_map( const UStringPrepProfile
* profile
,
511 const UChar
* src
, int32_t srcLength
,
512 UChar
* dest
, int32_t destCapacity
,
514 UParseError
* parseError
,
515 UErrorCode
* status
){
520 UBool allowUnassigned
= (UBool
) ((options
& USPREP_ALLOW_UNASSIGNED
)>0);
521 UStringPrepType type
;
524 const int32_t* indexes
= profile
->indexes
;
526 // no error checking the caller check for error and arguments
527 // no string length check the caller finds out the string length
529 for(srcIndex
=0;srcIndex
<srcLength
;){
532 U16_NEXT(src
,srcIndex
,srcLength
,ch
);
536 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
538 type
= getValues(result
, value
, isIndex
);
540 // check if the source codepoint is unassigned
541 if(type
== USPREP_UNASSIGNED
&& allowUnassigned
== FALSE
){
543 uprv_syntaxError(src
,srcIndex
-U16_LENGTH(ch
), srcLength
,parseError
);
544 *status
= U_STRINGPREP_UNASSIGNED_ERROR
;
547 }else if(type
== USPREP_MAP
){
549 int32_t index
, length
;
553 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
554 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
556 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
557 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
559 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
560 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
563 length
= profile
->mappingData
[index
++];
567 /* copy mapping to destination */
568 for(int32_t i
=0; i
< length
; i
++){
569 if(destIndex
< destCapacity
){
570 dest
[destIndex
] = profile
->mappingData
[index
+i
];
572 destIndex
++; /* for pre-flighting */
576 // subtract the delta to arrive at the code point
580 }else if(type
==USPREP_DELETE
){
581 // just consume the codepoint and contine
584 //copy the code point into destination
586 if(destIndex
< destCapacity
){
587 dest
[destIndex
] = (UChar
)ch
;
591 if(destIndex
+1 < destCapacity
){
592 dest
[destIndex
] = U16_LEAD(ch
);
593 dest
[destIndex
+1] = U16_TRAIL(ch
);
600 return u_terminateUChars(dest
, destCapacity
, destIndex
, status
);
604 1) Map -- For each character in the input, check if it has a mapping
605 and, if so, replace it with its mapping.
607 2) Normalize -- Possibly normalize the result of step 1 using Unicode
610 3) Prohibit -- Check for any characters that are not allowed in the
611 output. If any are found, return an error.
613 4) Check bidi -- Possibly check for right-to-left characters, and if
614 any are found, make sure that the whole string satisfies the
615 requirements for bidirectional strings. If the string does not
616 satisfy the requirements for bidirectional strings, return an
618 [Unicode3.2] defines several bidirectional categories; each character
619 has one bidirectional category assigned to it. For the purposes of
620 the requirements below, an "RandALCat character" is a character that
621 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
622 is a character that has Unicode bidirectional category "L". Note
625 that there are many characters which fall in neither of the above
626 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
627 this because they have bidirectional category "EN".
629 In any profile that specifies bidirectional character handling, all
630 three of the following requirements MUST be met:
632 1) The characters in section 5.8 MUST be prohibited.
634 2) If a string contains any RandALCat character, the string MUST NOT
635 contain any LCat character.
637 3) If a string contains any RandALCat character, a RandALCat
638 character MUST be the first character of the string, and a
639 RandALCat character MUST be the last character of the string.
641 U_CAPI
int32_t U_EXPORT2
642 usprep_prepare( const UStringPrepProfile
* profile
,
643 const UChar
* src
, int32_t srcLength
,
644 UChar
* dest
, int32_t destCapacity
,
646 UParseError
* parseError
,
647 UErrorCode
* status
){
649 // check error status
650 if(U_FAILURE(*status
)){
656 (src
==NULL
? srcLength
!=0 : srcLength
<-1) ||
657 (dest
==NULL
? destCapacity
!=0 : destCapacity
<0)) {
658 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
662 //get the string length
664 srcLength
= u_strlen(src
);
668 UChar
*b1
= s1
.getBuffer(srcLength
);
670 *status
= U_MEMORY_ALLOCATION_ERROR
;
673 int32_t b1Len
= usprep_map(profile
, src
, srcLength
,
674 b1
, s1
.getCapacity(), options
, parseError
, status
);
675 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
677 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
678 // redo processing of string
679 /* we do not have enough room so grow the buffer*/
680 b1
= s1
.getBuffer(b1Len
);
682 *status
= U_MEMORY_ALLOCATION_ERROR
;
686 *status
= U_ZERO_ERROR
; // reset error
687 b1Len
= usprep_map(profile
, src
, srcLength
,
688 b1
, s1
.getCapacity(), options
, parseError
, status
);
689 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
691 if(U_FAILURE(*status
)){
698 const Normalizer2
*n2
= Normalizer2::getNFKCInstance(*status
);
699 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*status
));
700 if(U_FAILURE(*status
)){
703 fn2
.normalize(s1
, s2
, *status
);
707 if(U_FAILURE(*status
)){
711 // Prohibit and checkBiDi in one pass
712 const UChar
*b2
= s2
.getBuffer();
713 int32_t b2Len
= s2
.length();
714 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
715 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
716 int32_t rtlPos
=-1, ltrPos
=-1;
718 for(int32_t b2Index
=0; b2Index
<b2Len
;){
720 U16_NEXT(b2
, b2Index
, b2Len
, ch
);
723 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
727 UStringPrepType type
= getValues(result
, value
, isIndex
);
729 if( type
== USPREP_PROHIBITED
||
730 ((result
< _SPREP_TYPE_THRESHOLD
) && (result
& 0x01) /* first bit says it the code point is prohibited*/)
732 *status
= U_STRINGPREP_PROHIBITED_ERROR
;
733 uprv_syntaxError(b1
, b2Index
-U16_LENGTH(ch
), b2Len
, parseError
);
737 if(profile
->checkBiDi
) {
738 direction
= ubidi_getClass(profile
->bdp
, ch
);
739 if(firstCharDir
== U_CHAR_DIRECTION_COUNT
){
740 firstCharDir
= direction
;
742 if(direction
== U_LEFT_TO_RIGHT
){
746 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
752 if(profile
->checkBiDi
== TRUE
){
754 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
755 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
756 uprv_syntaxError(b2
,(rtlPos
>ltrPos
) ? rtlPos
: ltrPos
, b2Len
, parseError
);
761 if( rightToLeft
== TRUE
&&
762 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
763 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
765 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
766 uprv_syntaxError(b2
, rtlPos
, b2Len
, parseError
);
770 return s2
.extract(dest
, destCapacity
, *status
);
774 /* data swapping ------------------------------------------------------------ */
776 U_CAPI
int32_t U_EXPORT2
777 usprep_swap(const UDataSwapper
*ds
,
778 const void *inData
, int32_t length
, void *outData
,
779 UErrorCode
*pErrorCode
) {
780 const UDataInfo
*pInfo
;
783 const uint8_t *inBytes
;
786 const int32_t *inIndexes
;
789 int32_t i
, offset
, count
, size
;
791 /* udata_swapDataHeader checks the arguments */
792 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
793 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
797 /* check data format and format version */
798 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
800 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
801 pInfo
->dataFormat
[1]==0x50 &&
802 pInfo
->dataFormat
[2]==0x52 &&
803 pInfo
->dataFormat
[3]==0x50 &&
804 pInfo
->formatVersion
[0]==3
806 udata_printError(ds
, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
807 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
808 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
809 pInfo
->formatVersion
[0]);
810 *pErrorCode
=U_UNSUPPORTED_ERROR
;
814 inBytes
=(const uint8_t *)inData
+headerSize
;
815 outBytes
=(uint8_t *)outData
+headerSize
;
817 inIndexes
=(const int32_t *)inBytes
;
822 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
824 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
829 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
830 for(i
=0; i
<16; ++i
) {
831 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
834 /* calculate the total length of the data */
836 16*4+ /* size of indexes[] */
837 indexes
[_SPREP_INDEX_TRIE_SIZE
]+
838 indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
842 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
844 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
848 /* copy the data for inaccessible bytes */
849 if(inBytes
!=outBytes
) {
850 uprv_memcpy(outBytes
, inBytes
, size
);
855 /* swap the int32_t indexes[] */
857 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
861 count
=indexes
[_SPREP_INDEX_TRIE_SIZE
];
862 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
865 /* swap the uint16_t mappingTable[] */
866 count
=indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
867 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
871 return headerSize
+size
;
874 #endif /* #if !UCONFIG_NO_IDNA */