1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2005-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: swapimpl.cpp
12 * tab size: 8 (not used)
15 * created on: 2005may05
16 * created by: Markus W. Scherer
18 * Data file swapping functions moved here from the common library
19 * because some data is hardcoded in ICU4C and needs not be swapped any more.
20 * Moving the functions here simplifies testing (for code coverage) because
21 * we need not jump through hoops (like adding snapshots of these files
24 * The declarations for these functions remain in the internal header files
25 * in icu/source/common/
28 #include "unicode/utypes.h"
29 #include "unicode/putil.h"
30 #include "unicode/udata.h"
32 /* Explicit include statement for std_string.h is needed
33 * for compilation on certain platforms. (e.g. AIX/VACPP)
35 #include "unicode/std_string.h"
44 #include "ulayout_props.h"
46 /* swapping implementations in common */
52 #include "ubidi_props.h"
56 #include "normalizer2impl.h"
62 #include "dictionarydata.h"
64 /* swapping implementations in i18n */
66 #if !UCONFIG_NO_NORMALIZATION
67 #include "uspoof_impl.h"
74 /* Unicode property (value) aliases data swapping --------------------------- */
76 static int32_t U_CALLCONV
77 upname_swap(const UDataSwapper
*ds
,
78 const void *inData
, int32_t length
, void *outData
,
79 UErrorCode
*pErrorCode
) {
80 /* udata_swapDataHeader checks the arguments */
81 int32_t headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
82 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
86 /* check data format and format version */
87 const UDataInfo
*pInfo
=
88 reinterpret_cast<const UDataInfo
*>(
89 static_cast<const char *>(inData
)+4);
91 pInfo
->dataFormat
[0]==0x70 && /* dataFormat="pnam" */
92 pInfo
->dataFormat
[1]==0x6e &&
93 pInfo
->dataFormat
[2]==0x61 &&
94 pInfo
->dataFormat
[3]==0x6d &&
95 pInfo
->formatVersion
[0]==2
97 udata_printError(ds
, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
98 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
99 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
100 pInfo
->formatVersion
[0]);
101 *pErrorCode
=U_UNSUPPORTED_ERROR
;
105 const uint8_t *inBytes
=static_cast<const uint8_t *>(inData
)+headerSize
;
106 uint8_t *outBytes
=static_cast<uint8_t *>(outData
)+headerSize
;
110 // formatVersion 2 initially has indexes[8], 32 bytes.
112 udata_printError(ds
, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
114 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
119 const int32_t *inIndexes
=reinterpret_cast<const int32_t *>(inBytes
);
120 int32_t totalSize
=udata_readInt32(ds
, inIndexes
[PropNameData::IX_TOTAL_SIZE
]);
122 if(length
<totalSize
) {
123 udata_printError(ds
, "upname_swap(): too few bytes (%d after header, should be %d) "
125 (int)length
, (int)totalSize
);
126 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
130 int32_t numBytesIndexesAndValueMaps
=
131 udata_readInt32(ds
, inIndexes
[PropNameData::IX_BYTE_TRIES_OFFSET
]);
133 // Swap the indexes[] and the valueMaps[].
134 ds
->swapArray32(ds
, inBytes
, numBytesIndexesAndValueMaps
, outBytes
, pErrorCode
);
136 // Copy the rest of the data.
137 if(inBytes
!=outBytes
) {
138 uprv_memcpy(outBytes
+numBytesIndexesAndValueMaps
,
139 inBytes
+numBytesIndexesAndValueMaps
,
140 totalSize
-numBytesIndexesAndValueMaps
);
143 // We need not swap anything else:
145 // The ByteTries are already byte-serialized, and are fixed on ASCII.
146 // (On an EBCDIC machine, the input string is converted to lowercase ASCII
149 // The name groups are mostly invariant characters, but since we only
150 // generate, and keep in subversion, ASCII versions of pnames.icu,
151 // and since only ICU4J uses the pnames.icu data file
152 // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
153 // we just copy those bytes too.
156 return headerSize
+totalSize
;
159 /* Unicode properties data swapping ----------------------------------------- */
161 static int32_t U_CALLCONV
162 uprops_swap(const UDataSwapper
*ds
,
163 const void *inData
, int32_t length
, void *outData
,
164 UErrorCode
*pErrorCode
) {
165 const UDataInfo
*pInfo
;
166 int32_t headerSize
, i
;
168 int32_t dataIndexes
[UPROPS_INDEX_COUNT
];
169 const int32_t *inData32
;
171 /* udata_swapDataHeader checks the arguments */
172 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
173 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
177 /* check data format and format version */
178 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
180 pInfo
->dataFormat
[0]==0x55 && /* dataFormat="UPro" */
181 pInfo
->dataFormat
[1]==0x50 &&
182 pInfo
->dataFormat
[2]==0x72 &&
183 pInfo
->dataFormat
[3]==0x6f &&
184 (3<=pInfo
->formatVersion
[0] && pInfo
->formatVersion
[0]<=7) &&
185 (pInfo
->formatVersion
[0]>=7 ||
186 (pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
187 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
))
189 udata_printError(ds
, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
190 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
191 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
192 pInfo
->formatVersion
[0]);
193 *pErrorCode
=U_UNSUPPORTED_ERROR
;
197 /* the properties file must contain at least the indexes array */
198 if(length
>=0 && (length
-headerSize
)<(int32_t)sizeof(dataIndexes
)) {
199 udata_printError(ds
, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
201 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
205 /* read the indexes */
206 inData32
=(const int32_t *)((const char *)inData
+headerSize
);
207 for(i
=0; i
<UPROPS_INDEX_COUNT
; ++i
) {
208 dataIndexes
[i
]=udata_readInt32(ds
, inData32
[i
]);
212 * comments are copied from the data format description in genprops/store.c
213 * indexes[] constants are in uprops.h
220 * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
221 * In earlier formatVersions, it is 0 and a lower dataIndexes entry
222 * has the top of the last item.
224 for(i
=UPROPS_DATA_TOP_INDEX
; i
>0 && (dataTop
=dataIndexes
[i
])==0; --i
) {}
226 if((length
-headerSize
)<(4*dataTop
)) {
227 udata_printError(ds
, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
229 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
233 outData32
=(int32_t *)((char *)outData
+headerSize
);
235 /* copy everything for inaccessible data (padding) */
236 if(inData32
!=outData32
) {
237 uprv_memcpy(outData32
, inData32
, 4*(size_t)dataTop
);
240 /* swap the indexes[16] */
241 ds
->swapArray32(ds
, inData32
, 4*UPROPS_INDEX_COUNT
, outData32
, pErrorCode
);
244 * swap the main properties UTrie
245 * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
247 utrie_swapAnyVersion(ds
,
248 inData32
+UPROPS_INDEX_COUNT
,
249 4*(dataIndexes
[UPROPS_PROPS32_INDEX
]-UPROPS_INDEX_COUNT
),
250 outData32
+UPROPS_INDEX_COUNT
,
254 * swap the properties and exceptions words
255 * P const uint32_t props32[i1-i0];
256 * E const uint32_t exceptions[i2-i1];
259 inData32
+dataIndexes
[UPROPS_PROPS32_INDEX
],
260 4*(dataIndexes
[UPROPS_EXCEPTIONS_TOP_INDEX
]-dataIndexes
[UPROPS_PROPS32_INDEX
]),
261 outData32
+dataIndexes
[UPROPS_PROPS32_INDEX
],
266 * U const UChar uchars[2*(i3-i2)];
269 inData32
+dataIndexes
[UPROPS_EXCEPTIONS_TOP_INDEX
],
270 4*(dataIndexes
[UPROPS_ADDITIONAL_TRIE_INDEX
]-dataIndexes
[UPROPS_EXCEPTIONS_TOP_INDEX
]),
271 outData32
+dataIndexes
[UPROPS_EXCEPTIONS_TOP_INDEX
],
275 * swap the additional UTrie
276 * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
278 utrie_swapAnyVersion(ds
,
279 inData32
+dataIndexes
[UPROPS_ADDITIONAL_TRIE_INDEX
],
280 4*(dataIndexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]-dataIndexes
[UPROPS_ADDITIONAL_TRIE_INDEX
]),
281 outData32
+dataIndexes
[UPROPS_ADDITIONAL_TRIE_INDEX
],
285 * swap the properties vectors
286 * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
289 inData32
+dataIndexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
],
290 4*(dataIndexes
[UPROPS_SCRIPT_EXTENSIONS_INDEX
]-dataIndexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
]),
291 outData32
+dataIndexes
[UPROPS_ADDITIONAL_VECTORS_INDEX
],
294 // swap the Script_Extensions data
295 // SCX const uint16_t scriptExtensions[2*(i7-i6)];
297 inData32
+dataIndexes
[UPROPS_SCRIPT_EXTENSIONS_INDEX
],
298 4*(dataIndexes
[UPROPS_RESERVED_INDEX_7
]-dataIndexes
[UPROPS_SCRIPT_EXTENSIONS_INDEX
]),
299 outData32
+dataIndexes
[UPROPS_SCRIPT_EXTENSIONS_INDEX
],
303 /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
304 return headerSize
+4*dataIndexes
[UPROPS_RESERVED_INDEX_7
];
307 /* Unicode case mapping data swapping --------------------------------------- */
309 static int32_t U_CALLCONV
310 ucase_swap(const UDataSwapper
*ds
,
311 const void *inData
, int32_t length
, void *outData
,
312 UErrorCode
*pErrorCode
) {
313 const UDataInfo
*pInfo
;
316 const uint8_t *inBytes
;
319 const int32_t *inIndexes
;
322 int32_t i
, offset
, count
, size
;
324 /* udata_swapDataHeader checks the arguments */
325 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
326 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
330 /* check data format and format version */
331 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
333 pInfo
->dataFormat
[0]==UCASE_FMT_0
&& /* dataFormat="cAsE" */
334 pInfo
->dataFormat
[1]==UCASE_FMT_1
&&
335 pInfo
->dataFormat
[2]==UCASE_FMT_2
&&
336 pInfo
->dataFormat
[3]==UCASE_FMT_3
&&
337 ((pInfo
->formatVersion
[0]==1 &&
338 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
339 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
) ||
340 (2<=pInfo
->formatVersion
[0] && pInfo
->formatVersion
[0]<=4))
342 udata_printError(ds
, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
343 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
344 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
345 pInfo
->formatVersion
[0]);
346 *pErrorCode
=U_UNSUPPORTED_ERROR
;
350 inBytes
=(const uint8_t *)inData
+headerSize
;
351 outBytes
=(uint8_t *)outData
+headerSize
;
353 inIndexes
=(const int32_t *)inBytes
;
358 udata_printError(ds
, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
360 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
365 /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
366 for(i
=0; i
<16; ++i
) {
367 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
370 /* get the total length of the data */
371 size
=indexes
[UCASE_IX_LENGTH
];
375 udata_printError(ds
, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
377 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
381 /* copy the data for inaccessible bytes */
382 if(inBytes
!=outBytes
) {
383 uprv_memcpy(outBytes
, inBytes
, size
);
388 /* swap the int32_t indexes[] */
389 count
=indexes
[UCASE_IX_INDEX_TOP
]*4;
390 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
394 count
=indexes
[UCASE_IX_TRIE_SIZE
];
395 utrie_swapAnyVersion(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
398 /* swap the uint16_t exceptions[] and unfold[] */
399 count
=(indexes
[UCASE_IX_EXC_LENGTH
]+indexes
[UCASE_IX_UNFOLD_LENGTH
])*2;
400 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
403 U_ASSERT(offset
==size
);
406 return headerSize
+size
;
409 /* Unicode bidi/shaping data swapping --------------------------------------- */
411 static int32_t U_CALLCONV
412 ubidi_swap(const UDataSwapper
*ds
,
413 const void *inData
, int32_t length
, void *outData
,
414 UErrorCode
*pErrorCode
) {
415 const UDataInfo
*pInfo
;
418 const uint8_t *inBytes
;
421 const int32_t *inIndexes
;
424 int32_t i
, offset
, count
, size
;
426 /* udata_swapDataHeader checks the arguments */
427 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
428 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
432 /* check data format and format version */
433 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
435 pInfo
->dataFormat
[0]==UBIDI_FMT_0
&& /* dataFormat="BiDi" */
436 pInfo
->dataFormat
[1]==UBIDI_FMT_1
&&
437 pInfo
->dataFormat
[2]==UBIDI_FMT_2
&&
438 pInfo
->dataFormat
[3]==UBIDI_FMT_3
&&
439 ((pInfo
->formatVersion
[0]==1 &&
440 pInfo
->formatVersion
[2]==UTRIE_SHIFT
&&
441 pInfo
->formatVersion
[3]==UTRIE_INDEX_SHIFT
) ||
442 pInfo
->formatVersion
[0]==2)
444 udata_printError(ds
, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
445 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
446 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
447 pInfo
->formatVersion
[0]);
448 *pErrorCode
=U_UNSUPPORTED_ERROR
;
452 inBytes
=(const uint8_t *)inData
+headerSize
;
453 outBytes
=(uint8_t *)outData
+headerSize
;
455 inIndexes
=(const int32_t *)inBytes
;
460 udata_printError(ds
, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
462 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
467 /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
468 for(i
=0; i
<16; ++i
) {
469 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
472 /* get the total length of the data */
473 size
=indexes
[UBIDI_IX_LENGTH
];
477 udata_printError(ds
, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
479 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
483 /* copy the data for inaccessible bytes */
484 if(inBytes
!=outBytes
) {
485 uprv_memcpy(outBytes
, inBytes
, size
);
490 /* swap the int32_t indexes[] */
491 count
=indexes
[UBIDI_IX_INDEX_TOP
]*4;
492 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
496 count
=indexes
[UBIDI_IX_TRIE_SIZE
];
497 utrie_swapAnyVersion(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
500 /* swap the uint32_t mirrors[] */
501 count
=indexes
[UBIDI_IX_MIRROR_LENGTH
]*4;
502 ds
->swapArray32(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
505 /* just skip the uint8_t jgArray[] and jgArray2[] */
506 count
=indexes
[UBIDI_IX_JG_LIMIT
]-indexes
[UBIDI_IX_JG_START
];
508 count
=indexes
[UBIDI_IX_JG_LIMIT2
]-indexes
[UBIDI_IX_JG_START2
];
511 U_ASSERT(offset
==size
);
514 return headerSize
+size
;
517 /* Unicode normalization data swapping -------------------------------------- */
519 #if !UCONFIG_NO_NORMALIZATION
521 static int32_t U_CALLCONV
522 unorm_swap(const UDataSwapper
*ds
,
523 const void *inData
, int32_t length
, void *outData
,
524 UErrorCode
*pErrorCode
) {
525 const UDataInfo
*pInfo
;
528 const uint8_t *inBytes
;
531 const int32_t *inIndexes
;
534 int32_t i
, offset
, count
, size
;
536 /* udata_swapDataHeader checks the arguments */
537 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
538 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
542 /* check data format and format version */
543 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
545 pInfo
->dataFormat
[0]==0x4e && /* dataFormat="Norm" */
546 pInfo
->dataFormat
[1]==0x6f &&
547 pInfo
->dataFormat
[2]==0x72 &&
548 pInfo
->dataFormat
[3]==0x6d &&
549 pInfo
->formatVersion
[0]==2
551 udata_printError(ds
, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
552 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
553 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
554 pInfo
->formatVersion
[0]);
555 *pErrorCode
=U_UNSUPPORTED_ERROR
;
559 inBytes
=(const uint8_t *)inData
+headerSize
;
560 outBytes
=(uint8_t *)outData
+headerSize
;
562 inIndexes
=(const int32_t *)inBytes
;
567 udata_printError(ds
, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
569 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
574 /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
575 for(i
=0; i
<32; ++i
) {
576 indexes
[i
]=udata_readInt32(ds
, inIndexes
[i
]);
579 /* calculate the total length of the data */
581 32*4+ /* size of indexes[] */
582 indexes
[_NORM_INDEX_TRIE_SIZE
]+
583 indexes
[_NORM_INDEX_UCHAR_COUNT
]*2+
584 indexes
[_NORM_INDEX_COMBINE_DATA_COUNT
]*2+
585 indexes
[_NORM_INDEX_FCD_TRIE_SIZE
]+
586 indexes
[_NORM_INDEX_AUX_TRIE_SIZE
]+
587 indexes
[_NORM_INDEX_CANON_SET_COUNT
]*2;
591 udata_printError(ds
, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
593 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
597 /* copy the data for inaccessible bytes */
598 if(inBytes
!=outBytes
) {
599 uprv_memcpy(outBytes
, inBytes
, size
);
604 /* swap the indexes[] */
606 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
609 /* swap the main UTrie */
610 count
=indexes
[_NORM_INDEX_TRIE_SIZE
];
611 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
614 /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
615 count
=(indexes
[_NORM_INDEX_UCHAR_COUNT
]+indexes
[_NORM_INDEX_COMBINE_DATA_COUNT
])*2;
616 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
619 /* swap the FCD UTrie */
620 count
=indexes
[_NORM_INDEX_FCD_TRIE_SIZE
];
622 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
626 /* swap the aux UTrie */
627 count
=indexes
[_NORM_INDEX_AUX_TRIE_SIZE
];
629 utrie_swap(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
633 /* swap the uint16_t combiningTable[] */
634 count
=indexes
[_NORM_INDEX_CANON_SET_COUNT
]*2;
635 ds
->swapArray16(ds
, inBytes
+offset
, count
, outBytes
+offset
, pErrorCode
);
639 return headerSize
+size
;
644 // Unicode text layout properties data swapping --------------------------------
646 static int32_t U_CALLCONV
647 ulayout_swap(const UDataSwapper
*ds
,
648 const void *inData
, int32_t length
, void *outData
,
649 UErrorCode
*pErrorCode
) {
650 // udata_swapDataHeader checks the arguments.
651 int32_t headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
652 if (pErrorCode
== nullptr || U_FAILURE(*pErrorCode
)) {
656 // Check data format and format version.
657 const UDataInfo
*pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
659 pInfo
->dataFormat
[0] == ULAYOUT_FMT_0
&& // dataFormat="Layo"
660 pInfo
->dataFormat
[1] == ULAYOUT_FMT_1
&&
661 pInfo
->dataFormat
[2] == ULAYOUT_FMT_2
&&
662 pInfo
->dataFormat
[3] == ULAYOUT_FMT_3
&&
663 pInfo
->formatVersion
[0] == 1)) {
665 "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
666 "is not recognized as text layout properties data\n",
667 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
668 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
669 pInfo
->formatVersion
[0]);
670 *pErrorCode
= U_UNSUPPORTED_ERROR
;
674 const uint8_t *inBytes
= (const uint8_t *)inData
+ headerSize
;
675 uint8_t *outBytes
= (uint8_t *)outData
+ headerSize
;
677 const int32_t *inIndexes
= (const int32_t *)inBytes
;
680 length
-= headerSize
;
681 if (length
< 12 * 4) {
683 "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n",
685 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
690 int32_t indexesLength
= udata_readInt32(ds
, inIndexes
[ULAYOUT_IX_INDEXES_LENGTH
]);
691 if (indexesLength
< 12) {
693 "ulayout_swap(): too few indexes (%d) for text layout properties data\n",
695 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
699 // Read the data offsets before swapping anything.
700 int32_t indexes
[ULAYOUT_IX_TRIES_TOP
+ 1];
701 for (int32_t i
= ULAYOUT_IX_INPC_TRIE_TOP
; i
<= ULAYOUT_IX_TRIES_TOP
; ++i
) {
702 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
704 int32_t size
= indexes
[ULAYOUT_IX_TRIES_TOP
];
709 "ulayout_swap(): too few bytes (%d after header) "
710 "for all of text layout properties data\n",
712 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
716 // Copy the data for inaccessible bytes.
717 if (inBytes
!= outBytes
) {
718 uprv_memcpy(outBytes
, inBytes
, size
);
721 // Swap the int32_t indexes[].
723 int32_t count
= indexesLength
* 4;
724 ds
->swapArray32(ds
, inBytes
, count
, outBytes
, pErrorCode
);
728 for (int32_t i
= ULAYOUT_IX_INPC_TRIE_TOP
; i
<= ULAYOUT_IX_TRIES_TOP
; ++i
) {
729 int32_t top
= indexes
[i
];
730 count
= top
- offset
;
731 U_ASSERT(count
>= 0);
733 utrie_swapAnyVersion(ds
, inBytes
+ offset
, count
, outBytes
+ offset
, pErrorCode
);
738 U_ASSERT(offset
== size
);
741 return headerSize
+ size
;
744 /* Swap 'Test' data from gentest */
745 static int32_t U_CALLCONV
746 test_swap(const UDataSwapper
*ds
,
747 const void *inData
, int32_t length
, void *outData
,
748 UErrorCode
*pErrorCode
) {
749 const UDataInfo
*pInfo
;
752 const uint8_t *inBytes
;
757 /* udata_swapDataHeader checks the arguments */
758 headerSize
=udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
759 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
760 udata_printError(ds
, "test_swap(): data header swap failed %s\n", pErrorCode
!= NULL
? u_errorName(*pErrorCode
) : "pErrorCode is NULL");
764 /* check data format and format version */
765 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
767 pInfo
->dataFormat
[0]==0x54 && /* dataFormat="Norm" */
768 pInfo
->dataFormat
[1]==0x65 &&
769 pInfo
->dataFormat
[2]==0x73 &&
770 pInfo
->dataFormat
[3]==0x74 &&
771 pInfo
->formatVersion
[0]==1
773 udata_printError(ds
, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
774 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
775 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
776 pInfo
->formatVersion
[0]);
777 *pErrorCode
=U_UNSUPPORTED_ERROR
;
781 inBytes
=(const uint8_t *)inData
+headerSize
;
782 outBytes
=(uint8_t *)outData
+headerSize
;
784 int32_t size16
= 2; // 16bit plus padding
785 int32_t sizeStr
= 5; // 4 char inv-str plus null
786 int32_t size
= size16
+ sizeStr
;
790 udata_printError(ds
, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
792 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
797 /* swap a 1 entry array */
798 ds
->swapArray16(ds
, inBytes
+offset
, size16
, outBytes
+offset
, pErrorCode
);
800 ds
->swapInvChars(ds
, inBytes
+offset
, sizeStr
, outBytes
+offset
, pErrorCode
);
803 return headerSize
+size
;
806 /* swap any data (except a .dat package) ------------------------------------ */
808 static const struct {
809 uint8_t dataFormat
[4];
812 { { 0x52, 0x65, 0x73, 0x42 }, ures_swap
}, /* dataFormat="ResB" */
813 #if !UCONFIG_NO_LEGACY_CONVERSION
814 { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap
}, /* dataFormat="cnvt" */
816 #if !UCONFIG_NO_CONVERSION
817 { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases
}, /* dataFormat="CvAl" */
820 { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap
}, /* dataFormat="SPRP" */
822 /* insert data formats here, descending by expected frequency of occurrence */
823 { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap
}, /* dataFormat="UPro" */
825 { { UCASE_FMT_0
, UCASE_FMT_1
, UCASE_FMT_2
, UCASE_FMT_3
},
826 ucase_swap
}, /* dataFormat="cAsE" */
828 { { UBIDI_FMT_0
, UBIDI_FMT_1
, UBIDI_FMT_2
, UBIDI_FMT_3
},
829 ubidi_swap
}, /* dataFormat="BiDi" */
831 #if !UCONFIG_NO_NORMALIZATION
832 { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap
}, /* dataFormat="Norm" */
833 { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap
}, /* dataFormat="Nrm2" */
836 { { ULAYOUT_FMT_0
, ULAYOUT_FMT_1
, ULAYOUT_FMT_2
, ULAYOUT_FMT_3
},
837 ulayout_swap
}, // dataFormat="Layo"
839 #if !UCONFIG_NO_COLLATION
840 { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap
}, /* dataFormat="UCol" */
841 { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA
},/* dataFormat="InvC" */
843 #if !UCONFIG_NO_BREAK_ITERATION
844 { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap
}, /* dataFormat="Brk " */
845 { { 0x44, 0x69, 0x63, 0x74 }, udict_swap
}, /* dataFormat="Dict" */
847 { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap
}, /* dataFormat="pnam" */
848 { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames
}, /* dataFormat="unam" */
849 #if !UCONFIG_NO_NORMALIZATION
850 { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap
}, /* dataFormat="Cfu " */
852 { { 0x54, 0x65, 0x73, 0x74 }, test_swap
} /* dataFormat="Test" */
855 U_CAPI
int32_t U_EXPORT2
856 udata_swap(const UDataSwapper
*ds
,
857 const void *inData
, int32_t length
, void *outData
,
858 UErrorCode
*pErrorCode
) {
859 char dataFormatChars
[4];
860 const UDataInfo
*pInfo
;
861 int32_t i
, swappedLength
;
863 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
868 * Preflight the header first; checks for illegal arguments, too.
869 * Do not swap the header right away because the format-specific swapper
870 * will swap it, get the headerSize again, and also use the header
871 * information. Otherwise we would have to pass some of the information
872 * and not be able to use the UDataSwapFn signature.
874 udata_swapDataHeader(ds
, inData
, -1, NULL
, pErrorCode
);
877 * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
878 * then we could check here for further known magic values and structures.
880 if(U_FAILURE(*pErrorCode
)) {
881 return 0; /* the data format was not recognized */
884 pInfo
=(const UDataInfo
*)((const char *)inData
+4);
887 /* convert the data format from ASCII to Unicode to the system charset */
889 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
890 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3]
893 if(uprv_isInvariantUString(u
, 4)) {
894 u_UCharsToChars(u
, dataFormatChars
, 4);
896 dataFormatChars
[0]=dataFormatChars
[1]=dataFormatChars
[2]=dataFormatChars
[3]='?';
900 /* dispatch to the swap function for the dataFormat */
901 for(i
=0; i
<UPRV_LENGTHOF(swapFns
); ++i
) {
902 if(0==memcmp(swapFns
[i
].dataFormat
, pInfo
->dataFormat
, 4)) {
903 swappedLength
=swapFns
[i
].swapFn(ds
, inData
, length
, outData
, pErrorCode
);
905 if(U_FAILURE(*pErrorCode
)) {
906 udata_printError(ds
, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
907 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
908 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
909 dataFormatChars
[0], dataFormatChars
[1],
910 dataFormatChars
[2], dataFormatChars
[3],
911 u_errorName(*pErrorCode
));
912 } else if(swappedLength
<(length
-15)) {
913 /* swapped less than expected */
914 udata_printError(ds
, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
915 swappedLength
, length
,
916 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
917 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
918 dataFormatChars
[0], dataFormatChars
[1],
919 dataFormatChars
[2], dataFormatChars
[3],
920 u_errorName(*pErrorCode
));
923 return swappedLength
;
927 /* the dataFormat was not recognized */
928 udata_printError(ds
, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
929 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1],
930 pInfo
->dataFormat
[2], pInfo
->dataFormat
[3],
931 dataFormatChars
[0], dataFormatChars
[1],
932 dataFormatChars
[2], dataFormatChars
[3]);
934 *pErrorCode
=U_UNSUPPORTED_ERROR
;