1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: usprep.cpp
12 * tab size: 8 (not used)
15 * created on: 2003jul2
16 * created by: Ram Viswanadha
19 #include "unicode/utypes.h"
23 #include "unicode/usprep.h"
25 #include "unicode/normalizer2.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "unicode/uversion.h"
37 #include "ubidi_props.h"
45 Static cache for already opened StringPrep profiles
47 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
48 static icu::UInitOnce gSharedDataInitOnce
= U_INITONCE_INITIALIZER
;
50 static UMutex usprepMutex
;
51 /* format version of spp file */
52 //static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
54 /* the Unicode version of the sprep data */
55 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
57 /* Profile names must be aligned to UStringPrepProfileType */
58 static const char * const PROFILE_NAMES
[] = {
59 "rfc3491", /* USPREP_RFC3491_NAMEPREP */
60 "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
61 "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
62 "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
63 "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
64 "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
65 "rfc3722", /* USPREP_RFC3722_ISCSI */
66 "rfc3920node", /* USPREP_RFC3920_NODEPREP */
67 "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
68 "rfc4011", /* USPREP_RFC4011_MIB */
69 "rfc4013", /* USPREP_RFC4013_SASLPREP */
70 "rfc4505", /* USPREP_RFC4505_TRACE */
71 "rfc4518", /* USPREP_RFC4518_LDAP */
72 "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
75 static UBool U_CALLCONV
76 isSPrepAcceptable(void * /* context */,
77 const char * /* type */,
78 const char * /* name */,
79 const UDataInfo
*pInfo
) {
82 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
83 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
84 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
85 pInfo
->dataFormat
[1]==0x50 &&
86 pInfo
->dataFormat
[2]==0x52 &&
87 pInfo
->dataFormat
[3]==0x50 &&
88 pInfo
->formatVersion
[0]==3 &&
89 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
90 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
92 //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
93 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
100 static int32_t U_CALLCONV
101 getSPrepFoldingOffset(uint32_t data
) {
103 return (int32_t)data
;
107 /* hashes an entry */
108 static int32_t U_CALLCONV
109 hashEntry(const UHashTok parm
) {
110 UStringPrepKey
*b
= (UStringPrepKey
*)parm
.pointer
;
111 UHashTok namekey
, pathkey
;
112 namekey
.pointer
= b
->name
;
113 pathkey
.pointer
= b
->path
;
114 uint32_t unsignedHash
= static_cast<uint32_t>(uhash_hashChars(namekey
)) +
115 37u * static_cast<uint32_t>(uhash_hashChars(pathkey
));
116 return static_cast<int32_t>(unsignedHash
);
119 /* compares two entries */
120 static UBool U_CALLCONV
121 compareEntries(const UHashTok p1
, const UHashTok p2
) {
122 UStringPrepKey
*b1
= (UStringPrepKey
*)p1
.pointer
;
123 UStringPrepKey
*b2
= (UStringPrepKey
*)p2
.pointer
;
124 UHashTok name1
, name2
, path1
, path2
;
125 name1
.pointer
= b1
->name
;
126 name2
.pointer
= b2
->name
;
127 path1
.pointer
= b1
->path
;
128 path2
.pointer
= b2
->path
;
129 return ((UBool
)(uhash_compareChars(name1
, name2
) &
130 uhash_compareChars(path1
, path2
)));
134 usprep_unload(UStringPrepProfile
* data
){
135 udata_close(data
->sprepData
);
139 usprep_internal_flushCache(UBool noRefCount
){
140 UStringPrepProfile
*profile
= NULL
;
141 UStringPrepKey
*key
= NULL
;
142 int32_t pos
= UHASH_FIRST
;
143 int32_t deletedNum
= 0;
144 const UHashElement
*e
;
147 * if shared data hasn't even been lazy evaluated yet
150 umtx_lock(&usprepMutex
);
151 if (SHARED_DATA_HASHTABLE
== NULL
) {
152 umtx_unlock(&usprepMutex
);
156 /*creates an enumeration to iterate through every element in the table */
157 while ((e
= uhash_nextElement(SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
159 profile
= (UStringPrepProfile
*) e
->value
.pointer
;
160 key
= (UStringPrepKey
*) e
->key
.pointer
;
162 if ((noRefCount
== FALSE
&& profile
->refCount
== 0) ||
165 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
167 /* unload the data */
168 usprep_unload(profile
);
170 if(key
->name
!= NULL
) {
171 uprv_free(key
->name
);
174 if(key
->path
!= NULL
) {
175 uprv_free(key
->path
);
183 umtx_unlock(&usprepMutex
);
188 /* Works just like ucnv_flushCache()
191 return usprep_internal_flushCache(FALSE);
195 static UBool U_CALLCONV
usprep_cleanup(void){
196 if (SHARED_DATA_HASHTABLE
!= NULL
) {
197 usprep_internal_flushCache(TRUE
);
198 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
199 uhash_close(SHARED_DATA_HASHTABLE
);
200 SHARED_DATA_HASHTABLE
= NULL
;
203 gSharedDataInitOnce
.reset();
204 return (SHARED_DATA_HASHTABLE
== NULL
);
209 /** Initializes the cache for resources */
210 static void U_CALLCONV
211 createCache(UErrorCode
&status
) {
212 SHARED_DATA_HASHTABLE
= uhash_open(hashEntry
, compareEntries
, NULL
, &status
);
213 if (U_FAILURE(status
)) {
214 SHARED_DATA_HASHTABLE
= NULL
;
216 ucln_common_registerCleanup(UCLN_COMMON_USPREP
, usprep_cleanup
);
220 initCache(UErrorCode
*status
) {
221 umtx_initOnce(gSharedDataInitOnce
, &createCache
, *status
);
224 static UBool U_CALLCONV
225 loadData(UStringPrepProfile
* profile
,
229 UErrorCode
* errorCode
) {
230 /* load Unicode SPREP data from file */
231 UTrie _sprepTrie
={ 0,0,0,0,0,0,0 };
232 UDataMemory
*dataMemory
;
233 const int32_t *p
=NULL
;
235 UVersionInfo normUnicodeVersion
;
236 int32_t normUniVer
, sprepUniVer
, normCorrVer
;
238 if(errorCode
==NULL
|| U_FAILURE(*errorCode
)) {
242 /* open the data outside the mutex block */
243 //TODO: change the path
244 dataMemory
=udata_openChoice(path
, type
, name
, isSPrepAcceptable
, NULL
, errorCode
);
245 if(U_FAILURE(*errorCode
)) {
249 p
=(const int32_t *)udata_getMemory(dataMemory
);
250 pb
=(const uint8_t *)(p
+_SPREP_INDEX_TOP
);
251 utrie_unserialize(&_sprepTrie
, pb
, p
[_SPREP_INDEX_TRIE_SIZE
], errorCode
);
252 _sprepTrie
.getFoldingOffset
=getSPrepFoldingOffset
;
255 if(U_FAILURE(*errorCode
)) {
256 udata_close(dataMemory
);
260 /* in the mutex block, set the data for this process */
261 umtx_lock(&usprepMutex
);
262 if(profile
->sprepData
==NULL
) {
263 profile
->sprepData
=dataMemory
;
265 uprv_memcpy(&profile
->indexes
, p
, sizeof(profile
->indexes
));
266 uprv_memcpy(&profile
->sprepTrie
, &_sprepTrie
, sizeof(UTrie
));
268 p
=(const int32_t *)udata_getMemory(profile
->sprepData
);
270 umtx_unlock(&usprepMutex
);
271 /* initialize some variables */
272 profile
->mappingData
=(uint16_t *)((uint8_t *)(p
+_SPREP_INDEX_TOP
)+profile
->indexes
[_SPREP_INDEX_TRIE_SIZE
]);
274 u_getUnicodeVersion(normUnicodeVersion
);
275 normUniVer
= (normUnicodeVersion
[0] << 24) + (normUnicodeVersion
[1] << 16) +
276 (normUnicodeVersion
[2] << 8 ) + (normUnicodeVersion
[3]);
277 sprepUniVer
= (dataVersion
[0] << 24) + (dataVersion
[1] << 16) +
278 (dataVersion
[2] << 8 ) + (dataVersion
[3]);
279 normCorrVer
= profile
->indexes
[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION
];
281 if(U_FAILURE(*errorCode
)){
282 udata_close(dataMemory
);
285 if( normUniVer
< sprepUniVer
&& /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
286 normUniVer
< normCorrVer
&& /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
287 ((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0) /* normalization turned on*/
289 *errorCode
= U_INVALID_FORMAT_ERROR
;
290 udata_close(dataMemory
);
293 profile
->isDataLoaded
= TRUE
;
295 /* if a different thread set it first, then close the extra data */
296 if(dataMemory
!=NULL
) {
297 udata_close(dataMemory
); /* NULL if it was set correctly */
301 return profile
->isDataLoaded
;
304 static UStringPrepProfile
*
305 usprep_getProfile(const char* path
,
309 UStringPrepProfile
* profile
= NULL
;
313 if(U_FAILURE(*status
)){
317 UStringPrepKey stackKey
;
319 * const is cast way to save malloc, strcpy and free calls
320 * we use the passed in pointers for fetching the data from the
321 * hash table which is safe
323 stackKey
.name
= (char*) name
;
324 stackKey
.path
= (char*) path
;
326 /* fetch the data from the cache */
327 umtx_lock(&usprepMutex
);
328 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
329 if(profile
!= NULL
) {
332 umtx_unlock(&usprepMutex
);
334 if(profile
== NULL
) {
335 /* else load the data and put the data in the cache */
336 LocalMemory
<UStringPrepProfile
> newProfile
;
337 if(newProfile
.allocateInsteadAndReset() == NULL
) {
338 *status
= U_MEMORY_ALLOCATION_ERROR
;
343 if(!loadData(newProfile
.getAlias(), path
, name
, _SPREP_DATA_TYPE
, status
) || U_FAILURE(*status
) ){
347 /* get the options */
348 newProfile
->doNFKC
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0);
349 newProfile
->checkBiDi
= (UBool
)((newProfile
->indexes
[_SPREP_OPTIONS
] & _SPREP_CHECK_BIDI_ON
) > 0);
351 LocalMemory
<UStringPrepKey
> key
;
352 LocalMemory
<char> keyName
;
353 LocalMemory
<char> keyPath
;
354 if( key
.allocateInsteadAndReset() == NULL
||
355 keyName
.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name
)+1)) == NULL
||
357 keyPath
.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path
)+1)) == NULL
)
359 *status
= U_MEMORY_ALLOCATION_ERROR
;
360 usprep_unload(newProfile
.getAlias());
364 umtx_lock(&usprepMutex
);
365 // If another thread already inserted the same key/value, refcount and cleanup our thread data
366 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
367 if(profile
!= NULL
) {
369 usprep_unload(newProfile
.getAlias());
372 /* initialize the key members */
373 key
->name
= keyName
.orphan();
374 uprv_strcpy(key
->name
, name
);
376 key
->path
= keyPath
.orphan();
377 uprv_strcpy(key
->path
, path
);
379 profile
= newProfile
.orphan();
381 /* add the data object to the cache */
382 profile
->refCount
= 1;
383 uhash_put(SHARED_DATA_HASHTABLE
, key
.orphan(), profile
, status
);
385 umtx_unlock(&usprepMutex
);
391 U_CAPI UStringPrepProfile
* U_EXPORT2
392 usprep_open(const char* path
,
396 if(status
== NULL
|| U_FAILURE(*status
)){
400 /* initialize the profile struct members */
401 return usprep_getProfile(path
,name
,status
);
404 U_CAPI UStringPrepProfile
* U_EXPORT2
405 usprep_openByType(UStringPrepProfileType type
,
406 UErrorCode
* status
) {
407 if(status
== NULL
|| U_FAILURE(*status
)){
410 int32_t index
= (int32_t)type
;
411 if (index
< 0 || index
>= UPRV_LENGTHOF(PROFILE_NAMES
)) {
412 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
415 return usprep_open(NULL
, PROFILE_NAMES
[index
], status
);
418 U_CAPI
void U_EXPORT2
419 usprep_close(UStringPrepProfile
* profile
){
424 umtx_lock(&usprepMutex
);
425 /* decrement the ref count*/
426 if(profile
->refCount
> 0){
429 umtx_unlock(&usprepMutex
);
434 uprv_syntaxError(const UChar
* rules
,
437 UParseError
* parseError
){
438 if(parseError
== NULL
){
441 parseError
->offset
= pos
;
442 parseError
->line
= 0 ; // we are not using line numbers
445 int32_t start
= (pos
< U_PARSE_CONTEXT_LEN
)? 0 : (pos
- (U_PARSE_CONTEXT_LEN
-1));
448 u_memcpy(parseError
->preContext
,rules
+start
,limit
-start
);
449 //null terminate the buffer
450 parseError
->preContext
[limit
-start
] = 0;
452 // for post-context; include error rules[pos]
454 limit
= start
+ (U_PARSE_CONTEXT_LEN
-1);
455 if (limit
> rulesLen
) {
458 if (start
< rulesLen
) {
459 u_memcpy(parseError
->postContext
,rules
+start
,limit
-start
);
461 //null terminate the buffer
462 parseError
->postContext
[limit
-start
]= 0;
466 static inline UStringPrepType
467 getValues(uint16_t trieWord
, int16_t& value
, UBool
& isIndex
){
469 UStringPrepType type
;
472 * Initial value stored in the mapping table
473 * just return USPREP_TYPE_LIMIT .. so that
474 * the source codepoint is copied to the destination
476 type
= USPREP_TYPE_LIMIT
;
479 }else if(trieWord
>= _SPREP_TYPE_THRESHOLD
){
480 type
= (UStringPrepType
) (trieWord
- _SPREP_TYPE_THRESHOLD
);
486 /* ascertain if the value is index or delta */
489 value
= trieWord
>> 2; //mask off the lower 2 bits and shift
492 value
= (int16_t)trieWord
;
493 value
= (value
>> 2);
496 if((trieWord
>>2) == _SPREP_MAX_INDEX_VALUE
){
497 type
= USPREP_DELETE
;
505 // TODO: change to writing to UnicodeString not UChar *
507 usprep_map( const UStringPrepProfile
* profile
,
508 const UChar
* src
, int32_t srcLength
,
509 UChar
* dest
, int32_t destCapacity
,
511 UParseError
* parseError
,
512 UErrorCode
* status
){
517 UBool allowUnassigned
= (UBool
) ((options
& USPREP_ALLOW_UNASSIGNED
)>0);
518 UStringPrepType type
;
521 const int32_t* indexes
= profile
->indexes
;
523 // no error checking the caller check for error and arguments
524 // no string length check the caller finds out the string length
526 for(srcIndex
=0;srcIndex
<srcLength
;){
529 U16_NEXT(src
,srcIndex
,srcLength
,ch
);
533 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
535 type
= getValues(result
, value
, isIndex
);
537 // check if the source codepoint is unassigned
538 if(type
== USPREP_UNASSIGNED
&& allowUnassigned
== FALSE
){
540 uprv_syntaxError(src
,srcIndex
-U16_LENGTH(ch
), srcLength
,parseError
);
541 *status
= U_STRINGPREP_UNASSIGNED_ERROR
;
544 }else if(type
== USPREP_MAP
){
546 int32_t index
, length
;
550 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
551 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
553 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
554 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
556 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
557 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
560 length
= profile
->mappingData
[index
++];
564 /* copy mapping to destination */
565 for(int32_t i
=0; i
< length
; i
++){
566 if(destIndex
< destCapacity
){
567 dest
[destIndex
] = profile
->mappingData
[index
+i
];
569 destIndex
++; /* for pre-flighting */
573 // subtract the delta to arrive at the code point
577 }else if(type
==USPREP_DELETE
){
578 // just consume the codepoint and contine
581 //copy the code point into destination
583 if(destIndex
< destCapacity
){
584 dest
[destIndex
] = (UChar
)ch
;
588 if(destIndex
+1 < destCapacity
){
589 dest
[destIndex
] = U16_LEAD(ch
);
590 dest
[destIndex
+1] = U16_TRAIL(ch
);
597 return u_terminateUChars(dest
, destCapacity
, destIndex
, status
);
601 1) Map -- For each character in the input, check if it has a mapping
602 and, if so, replace it with its mapping.
604 2) Normalize -- Possibly normalize the result of step 1 using Unicode
607 3) Prohibit -- Check for any characters that are not allowed in the
608 output. If any are found, return an error.
610 4) Check bidi -- Possibly check for right-to-left characters, and if
611 any are found, make sure that the whole string satisfies the
612 requirements for bidirectional strings. If the string does not
613 satisfy the requirements for bidirectional strings, return an
615 [Unicode3.2] defines several bidirectional categories; each character
616 has one bidirectional category assigned to it. For the purposes of
617 the requirements below, an "RandALCat character" is a character that
618 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
619 is a character that has Unicode bidirectional category "L". Note
622 that there are many characters which fall in neither of the above
623 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
624 this because they have bidirectional category "EN".
626 In any profile that specifies bidirectional character handling, all
627 three of the following requirements MUST be met:
629 1) The characters in section 5.8 MUST be prohibited.
631 2) If a string contains any RandALCat character, the string MUST NOT
632 contain any LCat character.
634 3) If a string contains any RandALCat character, a RandALCat
635 character MUST be the first character of the string, and a
636 RandALCat character MUST be the last character of the string.
638 U_CAPI
int32_t U_EXPORT2
639 usprep_prepare( const UStringPrepProfile
* profile
,
640 const UChar
* src
, int32_t srcLength
,
641 UChar
* dest
, int32_t destCapacity
,
643 UParseError
* parseError
,
644 UErrorCode
* status
){
646 // check error status
647 if(U_FAILURE(*status
)){
653 (src
==NULL
? srcLength
!=0 : srcLength
<-1) ||
654 (dest
==NULL
? destCapacity
!=0 : destCapacity
<0)) {
655 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
659 //get the string length
661 srcLength
= u_strlen(src
);
665 UChar
*b1
= s1
.getBuffer(srcLength
);
667 *status
= U_MEMORY_ALLOCATION_ERROR
;
670 int32_t b1Len
= usprep_map(profile
, src
, srcLength
,
671 b1
, s1
.getCapacity(), options
, parseError
, status
);
672 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
674 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
675 // redo processing of string
676 /* we do not have enough room so grow the buffer*/
677 b1
= s1
.getBuffer(b1Len
);
679 *status
= U_MEMORY_ALLOCATION_ERROR
;
683 *status
= U_ZERO_ERROR
; // reset error
684 b1Len
= usprep_map(profile
, src
, srcLength
,
685 b1
, s1
.getCapacity(), options
, parseError
, status
);
686 s1
.releaseBuffer(U_SUCCESS(*status
) ? b1Len
: 0);
688 if(U_FAILURE(*status
)){
695 const Normalizer2
*n2
= Normalizer2::getNFKCInstance(*status
);
696 FilteredNormalizer2
fn2(*n2
, *uniset_getUnicode32Instance(*status
));
697 if(U_FAILURE(*status
)){
700 fn2
.normalize(s1
, s2
, *status
);
704 if(U_FAILURE(*status
)){
708 // Prohibit and checkBiDi in one pass
709 const UChar
*b2
= s2
.getBuffer();
710 int32_t b2Len
= s2
.length();
711 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
712 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
713 int32_t rtlPos
=-1, ltrPos
=-1;
715 for(int32_t b2Index
=0; b2Index
<b2Len
;){
717 U16_NEXT(b2
, b2Index
, b2Len
, ch
);
720 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
724 UStringPrepType type
= getValues(result
, value
, isIndex
);
726 if( type
== USPREP_PROHIBITED
||
727 ((result
< _SPREP_TYPE_THRESHOLD
) && (result
& 0x01) /* first bit says it the code point is prohibited*/)
729 *status
= U_STRINGPREP_PROHIBITED_ERROR
;
730 uprv_syntaxError(b2
, b2Index
-U16_LENGTH(ch
), b2Len
, parseError
);
734 if(profile
->checkBiDi
) {
735 direction
= ubidi_getClass(ch
);
736 if(firstCharDir
== U_CHAR_DIRECTION_COUNT
){
737 firstCharDir
= direction
;
739 if(direction
== U_LEFT_TO_RIGHT
){
743 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
749 if(profile
->checkBiDi
== TRUE
){
751 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
752 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
753 uprv_syntaxError(b2
,(rtlPos
>ltrPos
) ? rtlPos
: ltrPos
, b2Len
, parseError
);
758 if( rightToLeft
== TRUE
&&
759 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
760 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
762 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
763 uprv_syntaxError(b2
, rtlPos
, b2Len
, parseError
);
767 return s2
.extract(dest
, destCapacity
, *status
);
771 /* data swapping ------------------------------------------------------------ */
773 U_CAPI
int32_t U_EXPORT2
774 usprep_swap(const UDataSwapper
*ds
,
775 const void *inData
, int32_t length
, void *outData
,
776 UErrorCode
*pErrorCode
) {
777 const UDataInfo
*pInfo
;
780 const uint8_t *inBytes
;
783 const int32_t *inIndexes
;
786 int32_t i
, offset
, count
, size
;
788 /* udata_swapDataHeader checks the arguments */
789 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
790 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
794 /* check data format and format version */
795 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
797 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
798 pInfo
->dataFormat
[1]==0x50 &&
799 pInfo
->dataFormat
[2]==0x52 &&
800 pInfo
->dataFormat
[3]==0x50 &&
801 pInfo
->formatVersion
[0]==3
803 udata_printError(ds
, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
804 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
805 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
806 pInfo
->formatVersion
[0]);
807 *pErrorCode
=U_UNSUPPORTED_ERROR
;
811 inBytes
=(const uint8_t *)inData
+headerSize
;
812 outBytes
=(uint8_t *)outData
+headerSize
;
814 inIndexes
=(const int32_t *)inBytes
;
819 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
821 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
826 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
827 for(i
=0; i
<16; ++i
) {
828 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
831 /* calculate the total length of the data */
833 16*4+ /* size of indexes[] */
834 indexes
[_SPREP_INDEX_TRIE_SIZE
]+
835 indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
839 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
841 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
845 /* copy the data for inaccessible bytes */
846 if(inBytes
!=outBytes
) {
847 uprv_memcpy(outBytes
, inBytes
, size
);
852 /* swap the int32_t indexes[] */
854 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
858 count
=indexes
[_SPREP_INDEX_TRIE_SIZE
];
859 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
862 /* swap the uint16_t mappingTable[] */
863 count
=indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
864 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
868 return headerSize
+size
;
871 #endif /* #if !UCONFIG_NO_IDNA */