2 *******************************************************************************
4 * Copyright (C) 2003-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: usprep.cpp
10 * tab size: 8 (not used)
13 * created on: 2003jul2
14 * created by: Ram Viswanadha
17 #include "unicode/utypes.h"
21 #include "unicode/usprep.h"
23 #include "unicode/unorm.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uversion.h"
40 Static cache for already opened StringPrep profiles
42 static UHashtable
*SHARED_DATA_HASHTABLE
= NULL
;
44 static UMTX usprepMutex
= NULL
;
46 /* format version of spp file */
47 static uint8_t formatVersion
[4]={ 0, 0, 0, 0 };
49 /* the Unicode version of the sprep data */
50 static UVersionInfo dataVersion
={ 0, 0, 0, 0 };
52 static UBool U_CALLCONV
53 isSPrepAcceptable(void * /* context */,
54 const char * /* type */,
55 const char * /* name */,
56 const UDataInfo
*pInfo
) {
59 pInfo
->isBigEndian
==U_IS_BIG_ENDIAN
&&
60 pInfo
->charsetFamily
==U_CHARSET_FAMILY
&&
61 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
62 pInfo
->dataFormat
[1]==0x50 &&
63 pInfo
->dataFormat
[2]==0x52 &&
64 pInfo
->dataFormat
[3]==0x50 &&
65 pInfo
->formatVersion
[0]==3 &&
66 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
67 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
69 uprv_memcpy(formatVersion
, pInfo
->formatVersion
, 4);
70 uprv_memcpy(dataVersion
, pInfo
->dataVersion
, 4);
77 static int32_t U_CALLCONV
78 getSPrepFoldingOffset(uint32_t data
) {
85 static int32_t U_EXPORT2 U_CALLCONV
86 hashEntry(const UHashTok parm
) {
87 UStringPrepKey
*b
= (UStringPrepKey
*)parm
.pointer
;
88 UHashTok namekey
, pathkey
;
89 namekey
.pointer
= b
->name
;
90 pathkey
.pointer
= b
->path
;
91 return uhash_hashChars(namekey
)+37*uhash_hashChars(pathkey
);
94 /* compares two entries */
95 static UBool U_EXPORT2 U_CALLCONV
96 compareEntries(const UHashTok p1
, const UHashTok p2
) {
97 UStringPrepKey
*b1
= (UStringPrepKey
*)p1
.pointer
;
98 UStringPrepKey
*b2
= (UStringPrepKey
*)p2
.pointer
;
99 UHashTok name1
, name2
, path1
, path2
;
100 name1
.pointer
= b1
->name
;
101 name2
.pointer
= b2
->name
;
102 path1
.pointer
= b1
->path
;
103 path2
.pointer
= b2
->path
;
104 return ((UBool
)(uhash_compareChars(name1
, name2
) &
105 uhash_compareChars(path1
, path2
)));
109 usprep_unload(UStringPrepProfile
* data
){
110 udata_close(data
->sprepData
);
114 usprep_internal_flushCache(UBool noRefCount
){
115 UStringPrepProfile
*profile
= NULL
;
116 UStringPrepKey
*key
= NULL
;
118 int32_t deletedNum
= 0;
119 const UHashElement
*e
;
122 * if shared data hasn't even been lazy evaluated yet
125 umtx_lock(&usprepMutex
);
126 if (SHARED_DATA_HASHTABLE
== NULL
) {
127 umtx_unlock(&usprepMutex
);
131 /*creates an enumeration to iterate through every element in the table */
132 while ((e
= uhash_nextElement(SHARED_DATA_HASHTABLE
, &pos
)) != NULL
)
134 profile
= (UStringPrepProfile
*) e
->value
.pointer
;
135 key
= (UStringPrepKey
*) e
->key
.pointer
;
137 if ((noRefCount
== FALSE
&& profile
->refCount
== 0) ||
140 uhash_removeElement(SHARED_DATA_HASHTABLE
, e
);
142 /* unload the data */
143 usprep_unload(profile
);
145 if(key
->name
!= NULL
) {
146 uprv_free(key
->name
);
149 if(key
->path
!= NULL
) {
150 uprv_free(key
->path
);
158 umtx_unlock(&usprepMutex
);
163 /* Works just like ucnv_flushCache()
166 return usprep_internal_flushCache(FALSE);
170 static UBool U_CALLCONV
usprep_cleanup(void){
171 if (SHARED_DATA_HASHTABLE
!= NULL
) {
172 usprep_internal_flushCache(TRUE
);
173 if (SHARED_DATA_HASHTABLE
!= NULL
&& uhash_count(SHARED_DATA_HASHTABLE
) == 0) {
174 uhash_close(SHARED_DATA_HASHTABLE
);
175 SHARED_DATA_HASHTABLE
= NULL
;
179 umtx_destroy(&usprepMutex
); /* Don't worry about destroying the mutex even */
180 /* if the hash table still exists. The mutex */
181 /* will lazily re-init itself if needed. */
182 return (SHARED_DATA_HASHTABLE
== NULL
);
188 umtx_init(&usprepMutex
);
191 /** Initializes the cache for resources */
193 initCache(UErrorCode
*status
) {
194 UBool makeCache
= FALSE
;
195 umtx_lock(&usprepMutex
);
196 makeCache
= (SHARED_DATA_HASHTABLE
== NULL
);
197 umtx_unlock(&usprepMutex
);
199 UHashtable
*newCache
= uhash_open(hashEntry
, compareEntries
, status
);
200 if (U_FAILURE(*status
)) {
203 umtx_lock(&usprepMutex
);
204 if(SHARED_DATA_HASHTABLE
== NULL
) {
205 SHARED_DATA_HASHTABLE
= newCache
;
206 ucln_common_registerCleanup(UCLN_COMMON_USPREP
, usprep_cleanup
);
209 umtx_unlock(&usprepMutex
);
210 if(newCache
!= NULL
) {
211 uhash_close(newCache
);
216 static UBool U_CALLCONV
217 loadData(UStringPrepProfile
* profile
,
221 UErrorCode
* errorCode
) {
222 /* load Unicode SPREP data from file */
223 UTrie _sprepTrie
={ 0,0,0,0,0,0,0 };
224 UDataMemory
*dataMemory
;
225 const int32_t *p
=NULL
;
227 UVersionInfo normUnicodeVersion
;
228 int32_t normUniVer
, sprepUniVer
, normCorrVer
;
230 if(errorCode
==NULL
|| U_FAILURE(*errorCode
)) {
234 /* open the data outside the mutex block */
235 //TODO: change the path
236 dataMemory
=udata_openChoice(path
, type
, name
, isSPrepAcceptable
, NULL
, errorCode
);
237 if(U_FAILURE(*errorCode
)) {
241 p
=(const int32_t *)udata_getMemory(dataMemory
);
242 pb
=(const uint8_t *)(p
+_SPREP_INDEX_TOP
);
243 utrie_unserialize(&_sprepTrie
, pb
, p
[_SPREP_INDEX_TRIE_SIZE
], errorCode
);
244 _sprepTrie
.getFoldingOffset
=getSPrepFoldingOffset
;
247 if(U_FAILURE(*errorCode
)) {
248 udata_close(dataMemory
);
252 /* in the mutex block, set the data for this process */
253 umtx_lock(&usprepMutex
);
254 if(profile
->sprepData
==NULL
) {
255 profile
->sprepData
=dataMemory
;
257 uprv_memcpy(&profile
->indexes
, p
, sizeof(profile
->indexes
));
258 uprv_memcpy(&profile
->sprepTrie
, &_sprepTrie
, sizeof(UTrie
));
260 p
=(const int32_t *)udata_getMemory(profile
->sprepData
);
262 umtx_unlock(&usprepMutex
);
263 /* initialize some variables */
264 profile
->mappingData
=(uint16_t *)((uint8_t *)(p
+_SPREP_INDEX_TOP
)+profile
->indexes
[_SPREP_INDEX_TRIE_SIZE
]);
266 unorm_getUnicodeVersion(&normUnicodeVersion
, errorCode
);
267 normUniVer
= (normUnicodeVersion
[0] << 24) + (normUnicodeVersion
[1] << 16) +
268 (normUnicodeVersion
[2] << 8 ) + (normUnicodeVersion
[3]);
269 sprepUniVer
= (dataVersion
[0] << 24) + (dataVersion
[1] << 16) +
270 (dataVersion
[2] << 8 ) + (dataVersion
[3]);
271 normCorrVer
= profile
->indexes
[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION
];
273 if(U_FAILURE(*errorCode
)){
274 udata_close(dataMemory
);
277 if( normUniVer
< sprepUniVer
&& /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
278 normUniVer
< normCorrVer
&& /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
279 ((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0) /* normalization turned on*/
281 *errorCode
= U_INVALID_FORMAT_ERROR
;
282 udata_close(dataMemory
);
285 profile
->isDataLoaded
= TRUE
;
287 /* if a different thread set it first, then close the extra data */
288 if(dataMemory
!=NULL
) {
289 udata_close(dataMemory
); /* NULL if it was set correctly */
293 return profile
->isDataLoaded
;
296 static UStringPrepProfile
*
297 usprep_getProfile(const char* path
,
301 UStringPrepProfile
* profile
= NULL
;
305 if(U_FAILURE(*status
)){
309 UStringPrepKey stackKey
;
311 * const is cast way to save malloc, strcpy and free calls
312 * we use the passed in pointers for fetching the data from the
313 * hash table which is safe
315 stackKey
.name
= (char*) name
;
316 stackKey
.path
= (char*) path
;
318 /* fetch the data from the cache */
319 profile
= (UStringPrepProfile
*) (uhash_get(SHARED_DATA_HASHTABLE
,&stackKey
));
322 UStringPrepKey
* key
= (UStringPrepKey
*) uprv_malloc(sizeof(UStringPrepKey
));
324 *status
= U_MEMORY_ALLOCATION_ERROR
;
327 /* else load the data and put the data in the cache */
328 profile
= (UStringPrepProfile
*) uprv_malloc(sizeof(UStringPrepProfile
));
330 *status
= U_MEMORY_ALLOCATION_ERROR
;
335 /* initialize the data struct members */
336 uprv_memset(profile
->indexes
,0,sizeof(profile
->indexes
));
337 profile
->mappingData
= NULL
;
338 profile
->sprepData
= NULL
;
339 profile
->refCount
= 0;
341 /* initialize the key memebers */
342 key
->name
= (char*) uprv_malloc(uprv_strlen(name
)+1);
343 if(key
->name
== NULL
){
344 *status
= U_MEMORY_ALLOCATION_ERROR
;
350 uprv_strcpy(key
->name
, name
);
355 key
->path
= (char*) uprv_malloc(uprv_strlen(path
)+1);
356 if(key
->path
== NULL
){
357 *status
= U_MEMORY_ALLOCATION_ERROR
;
358 uprv_free(key
->path
);
363 uprv_strcpy(key
->path
, path
);
367 if(!loadData(profile
, path
, name
, _SPREP_DATA_TYPE
, status
) || U_FAILURE(*status
) ){
371 /* get the options */
372 profile
->doNFKC
= (UBool
)((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_NORMALIZATION_ON
) > 0);
373 profile
->checkBiDi
= (UBool
)((profile
->indexes
[_SPREP_OPTIONS
] & _SPREP_CHECK_BIDI_ON
) > 0);
375 umtx_lock(&usprepMutex
);
376 /* add the data object to the cache */
377 uhash_put(SHARED_DATA_HASHTABLE
, key
, profile
, status
);
378 umtx_unlock(&usprepMutex
);
380 umtx_lock(&usprepMutex
);
381 /* increment the refcount */
383 umtx_unlock(&usprepMutex
);
388 U_CAPI UStringPrepProfile
* U_EXPORT2
389 usprep_open(const char* path
,
393 if(status
== NULL
|| U_FAILURE(*status
)){
396 /* initialize the mutex */
399 /* initialize the profile struct members */
400 return usprep_getProfile(path
,name
,status
);;
403 U_CAPI
void U_EXPORT2
404 usprep_close(UStringPrepProfile
* profile
){
409 umtx_lock(&usprepMutex
);
410 /* decrement the ref count*/
411 if(profile
->refCount
> 0){
414 umtx_unlock(&usprepMutex
);
419 uprv_syntaxError(const UChar
* rules
,
422 UParseError
* parseError
){
423 if(parseError
== NULL
){
426 parseError
->offset
= pos
;
427 parseError
->line
= 0 ; // we are not using line numbers
430 int32_t start
= (pos
<=U_PARSE_CONTEXT_LEN
)? 0 : (pos
- (U_PARSE_CONTEXT_LEN
-1));
433 u_memcpy(parseError
->preContext
,rules
+start
,limit
-start
);
434 //null terminate the buffer
435 parseError
->preContext
[limit
-start
] = 0;
437 // for post-context; include error rules[pos]
439 limit
= start
+ (U_PARSE_CONTEXT_LEN
-1);
440 if (limit
> rulesLen
) {
443 if (start
< rulesLen
) {
444 u_memcpy(parseError
->postContext
,rules
+start
,limit
-start
);
446 //null terminate the buffer
447 parseError
->postContext
[limit
-start
]= 0;
451 static inline UStringPrepType
452 getValues(uint16_t trieWord
, int16_t& value
, UBool
& isIndex
){
454 UStringPrepType type
;
457 * Initial value stored in the mapping table
458 * just return USPREP_TYPE_LIMIT .. so that
459 * the source codepoint is copied to the destination
461 type
= USPREP_TYPE_LIMIT
;
462 }else if(trieWord
>= _SPREP_TYPE_THRESHOLD
){
463 type
= (UStringPrepType
) (trieWord
- _SPREP_TYPE_THRESHOLD
);
467 /* ascertain if the value is index or delta */
470 value
= trieWord
>> 2; //mask off the lower 2 bits and shift
474 value
= (int16_t)trieWord
;
475 value
= (value
>> 2);
479 if((trieWord
>>2) == _SPREP_MAX_INDEX_VALUE
){
480 type
= USPREP_DELETE
;
491 usprep_map( const UStringPrepProfile
* profile
,
492 const UChar
* src
, int32_t srcLength
,
493 UChar
* dest
, int32_t destCapacity
,
495 UParseError
* parseError
,
496 UErrorCode
* status
){
501 UBool allowUnassigned
= (UBool
) ((options
& USPREP_ALLOW_UNASSIGNED
)>0);
502 UStringPrepType type
;
505 const int32_t* indexes
= profile
->indexes
;
507 // no error checking the caller check for error and arguments
508 // no string length check the caller finds out the string length
510 for(srcIndex
=0;srcIndex
<srcLength
;){
513 U16_NEXT(src
,srcIndex
,srcLength
,ch
);
517 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
519 type
= getValues(result
, value
, isIndex
);
521 // check if the source codepoint is unassigned
522 if(type
== USPREP_UNASSIGNED
&& allowUnassigned
== FALSE
){
524 uprv_syntaxError(src
,srcIndex
-U16_LENGTH(ch
), srcLength
,parseError
);
525 *status
= U_STRINGPREP_UNASSIGNED_ERROR
;
528 }else if(type
== USPREP_MAP
){
530 int32_t index
, length
;
534 if(index
>= indexes
[_SPREP_ONE_UCHAR_MAPPING_INDEX_START
] &&
535 index
< indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
]){
537 }else if(index
>= indexes
[_SPREP_TWO_UCHARS_MAPPING_INDEX_START
] &&
538 index
< indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
]){
540 }else if(index
>= indexes
[_SPREP_THREE_UCHARS_MAPPING_INDEX_START
] &&
541 index
< indexes
[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START
]){
544 length
= profile
->mappingData
[index
++];
548 /* copy mapping to destination */
549 for(int32_t i
=0; i
< length
; i
++){
550 if(destIndex
< destCapacity
){
551 dest
[destIndex
] = profile
->mappingData
[index
+i
];
553 destIndex
++; /* for pre-flighting */
557 // subtract the delta to arrive at the code point
561 }else if(type
==USPREP_DELETE
){
562 // just consume the codepoint and contine
565 //copy the code point into destination
567 if(destIndex
< destCapacity
){
568 dest
[destIndex
] = (UChar
)ch
;
572 if(destIndex
+1 < destCapacity
){
573 dest
[destIndex
] = U16_LEAD(ch
);
574 dest
[destIndex
+1] = U16_TRAIL(ch
);
581 return u_terminateUChars(dest
, destCapacity
, destIndex
, status
);
586 usprep_normalize( const UChar
* src
, int32_t srcLength
,
587 UChar
* dest
, int32_t destCapacity
,
588 UErrorCode
* status
){
590 * Option UNORM_BEFORE_PRI_29:
592 * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
593 * requires strict adherence to Unicode 3.2 normalization,
594 * including buggy composition from before fixing Public Review Issue #29.
595 * Note that this results in some valid but nonsensical text to be
596 * either corrupted or rejected, depending on the text.
597 * See http://www.unicode.org/review/resolved-pri.html#pri29
598 * See unorm.cpp and cnormtst.c
600 return unorm_normalize(
602 UNORM_NFKC
, UNORM_UNICODE_3_2
|UNORM_BEFORE_PRI_29
,
609 1) Map -- For each character in the input, check if it has a mapping
610 and, if so, replace it with its mapping.
612 2) Normalize -- Possibly normalize the result of step 1 using Unicode
615 3) Prohibit -- Check for any characters that are not allowed in the
616 output. If any are found, return an error.
618 4) Check bidi -- Possibly check for right-to-left characters, and if
619 any are found, make sure that the whole string satisfies the
620 requirements for bidirectional strings. If the string does not
621 satisfy the requirements for bidirectional strings, return an
623 [Unicode3.2] defines several bidirectional categories; each character
624 has one bidirectional category assigned to it. For the purposes of
625 the requirements below, an "RandALCat character" is a character that
626 has Unicode bidirectional categories "R" or "AL"; an "LCat character"
627 is a character that has Unicode bidirectional category "L". Note
630 that there are many characters which fall in neither of the above
631 definitions; Latin digits (<U+0030> through <U+0039>) are examples of
632 this because they have bidirectional category "EN".
634 In any profile that specifies bidirectional character handling, all
635 three of the following requirements MUST be met:
637 1) The characters in section 5.8 MUST be prohibited.
639 2) If a string contains any RandALCat character, the string MUST NOT
640 contain any LCat character.
642 3) If a string contains any RandALCat character, a RandALCat
643 character MUST be the first character of the string, and a
644 RandALCat character MUST be the last character of the string.
647 #define MAX_STACK_BUFFER_SIZE 300
650 U_CAPI
int32_t U_EXPORT2
651 usprep_prepare( const UStringPrepProfile
* profile
,
652 const UChar
* src
, int32_t srcLength
,
653 UChar
* dest
, int32_t destCapacity
,
655 UParseError
* parseError
,
656 UErrorCode
* status
){
658 // check error status
659 if(status
== NULL
|| U_FAILURE(*status
)){
664 if(profile
==NULL
|| src
==NULL
|| srcLength
<-1 || (dest
==NULL
&& destCapacity
!=0)) {
665 *status
=U_ILLEGAL_ARGUMENT_ERROR
;
669 UChar b1Stack
[MAX_STACK_BUFFER_SIZE
], b2Stack
[MAX_STACK_BUFFER_SIZE
];
670 UChar
*b1
= b1Stack
, *b2
= b2Stack
;
671 int32_t b1Len
, b2Len
=0,
672 b1Capacity
= MAX_STACK_BUFFER_SIZE
,
673 b2Capacity
= MAX_STACK_BUFFER_SIZE
;
676 UCharDirection direction
=U_CHAR_DIRECTION_COUNT
, firstCharDir
=U_CHAR_DIRECTION_COUNT
;
677 UBool leftToRight
=FALSE
, rightToLeft
=FALSE
;
678 int32_t rtlPos
=-1, ltrPos
=-1;
680 //get the string length
682 srcLength
= u_strlen(src
);
685 b1Len
= usprep_map(profile
, src
, srcLength
, b1
, b1Capacity
, options
, parseError
, status
);
687 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
688 // redo processing of string
689 /* we do not have enough room so grow the buffer*/
690 b1
= (UChar
*) uprv_malloc(b1Len
* U_SIZEOF_UCHAR
);
692 *status
= U_MEMORY_ALLOCATION_ERROR
;
696 *status
= U_ZERO_ERROR
; // reset error
698 b1Len
= usprep_map(profile
, src
, srcLength
, b1
, b1Len
, options
, parseError
, status
);
703 if(profile
->doNFKC
== TRUE
){
704 b2Len
= usprep_normalize(b1
,b1Len
, b2
,b2Capacity
,status
);
706 if(*status
== U_BUFFER_OVERFLOW_ERROR
){
707 // redo processing of string
708 /* we do not have enough room so grow the buffer*/
709 b2
= (UChar
*) uprv_malloc(b2Len
* U_SIZEOF_UCHAR
);
711 *status
= U_MEMORY_ALLOCATION_ERROR
;
715 *status
= U_ZERO_ERROR
; // reset error
717 b2Len
= usprep_normalize(b1
,b1Len
, b2
,b2Len
,status
);
727 if(U_FAILURE(*status
)){
732 UStringPrepType type
;
736 // Prohibit and checkBiDi in one pass
737 for(b2Index
=0; b2Index
<b2Len
;){
741 U16_NEXT(b2
, b2Index
, b2Len
, ch
);
743 UTRIE_GET16(&profile
->sprepTrie
,ch
,result
);
745 type
= getValues(result
, value
, isIndex
);
747 if( type
== USPREP_PROHIBITED
||
748 ((result
< _SPREP_TYPE_THRESHOLD
) && (result
& 0x01) /* first bit says it the code point is prohibited*/)
750 *status
= U_STRINGPREP_PROHIBITED_ERROR
;
751 uprv_syntaxError(b1
, b2Index
-U16_LENGTH(ch
), b2Len
, parseError
);
755 direction
= u_charDirection(ch
);
756 if(firstCharDir
== U_CHAR_DIRECTION_COUNT
){
757 firstCharDir
= direction
;
759 if(direction
== U_LEFT_TO_RIGHT
){
763 if(direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
){
768 if(profile
->checkBiDi
== TRUE
){
770 if( leftToRight
== TRUE
&& rightToLeft
== TRUE
){
771 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
772 uprv_syntaxError(b2
,(rtlPos
>ltrPos
) ? rtlPos
: ltrPos
, b2Len
, parseError
);
777 if( rightToLeft
== TRUE
&&
778 !((firstCharDir
== U_RIGHT_TO_LEFT
|| firstCharDir
== U_RIGHT_TO_LEFT_ARABIC
) &&
779 (direction
== U_RIGHT_TO_LEFT
|| direction
== U_RIGHT_TO_LEFT_ARABIC
))
781 *status
= U_STRINGPREP_CHECK_BIDI_ERROR
;
782 uprv_syntaxError(b2
, rtlPos
, b2Len
, parseError
);
786 if(b2Len
<= destCapacity
){
787 uprv_memmove(dest
,b2
, b2Len
*U_SIZEOF_UCHAR
);
796 if(b2
!=b1Stack
&& b2
!=b2Stack
&& b2
!=b1
/* b1 should not be freed twice */){
800 return u_terminateUChars(dest
, destCapacity
, b2Len
, status
);
804 /* data swapping ------------------------------------------------------------ */
806 U_CAPI
int32_t U_EXPORT2
807 usprep_swap(const UDataSwapper
*ds
,
808 const void *inData
, int32_t length
, void *outData
,
809 UErrorCode
*pErrorCode
) {
810 const UDataInfo
*pInfo
;
813 const uint8_t *inBytes
;
816 const int32_t *inIndexes
;
819 int32_t i
, offset
, count
, size
;
821 /* udata_swapDataHeader checks the arguments */
822 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
823 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
827 /* check data format and format version */
828 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
830 pInfo
->dataFormat
[0]==0x53 && /* dataFormat="SPRP" */
831 pInfo
->dataFormat
[1]==0x50 &&
832 pInfo
->dataFormat
[2]==0x52 &&
833 pInfo
->dataFormat
[3]==0x50 &&
834 pInfo
->formatVersion
[0]==3
836 udata_printError(ds
, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n",
837 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
838 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
839 pInfo
->formatVersion
[0]);
840 *pErrorCode
=U_UNSUPPORTED_ERROR
;
844 inBytes
=(const uint8_t *)inData
+headerSize
;
845 outBytes
=(uint8_t *)outData
+headerSize
;
847 inIndexes
=(const int32_t *)inBytes
;
852 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n",
854 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
859 /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
860 for(i
=0; i
<16; ++i
) {
861 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
864 /* calculate the total length of the data */
866 16*4+ /* size of indexes[] */
867 indexes
[_SPREP_INDEX_TRIE_SIZE
]+
868 indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
872 udata_printError(ds
, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n",
874 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
878 /* copy the data for inaccessible bytes */
879 if(inBytes
!=outBytes
) {
880 uprv_memcpy(outBytes
, inBytes
, size
);
885 /* swap the int32_t indexes[] */
887 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
891 count
=indexes
[_SPREP_INDEX_TRIE_SIZE
];
892 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
895 /* swap the uint16_t mappingTable[] */
896 count
=indexes
[_SPREP_INDEX_MAPPING_DATA_SIZE
];
897 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
901 return headerSize
+size
;
904 #endif /* #if !UCONFIG_NO_IDNA */