1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2003-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: udataswp.h
12 * tab size: 8 (not used)
15 * created on: 2003jun05
16 * created by: Markus W. Scherer
18 * Definitions for ICU data transformations for different platforms,
19 * changing between big- and little-endian data and/or between
20 * charset families (ASCII<->EBCDIC).
23 #ifndef __UDATASWP_H__
24 #define __UDATASWP_H__
27 #include "unicode/utypes.h"
29 /* forward declaration */
34 typedef struct UDataSwapper UDataSwapper
;
37 * Function type for data transformation.
38 * Transforms data, or just returns the length of the data if
39 * the input length is -1.
40 * Swap functions assume that their data pointers are aligned properly.
42 * Quick implementation outline:
43 * (best to copy and adapt and existing swapper implementation)
44 * check that the data looks like the expected format
47 * never dereference outData
48 * read inData and determine the data size
49 * assume that inData is long enough for this
51 * outData can be NULL if length==0
52 * inData==outData (in-place swapping) possible but not required!
53 * verify that length>=(actual size)
54 * if there is a chance that not every byte up to size is reached
55 * due to padding etc.:
56 * if(inData!=outData) {
57 * memcpy(outData, inData, actual size);
63 * Further implementation notes:
64 * - read integers from inData before swapping them
65 * because in-place swapping can make them unreadable
66 * - compareInvChars compares a local Unicode string with already-swapped
67 * output charset strings
69 * @param ds Pointer to UDataSwapper containing global data about the
70 * transformation and function pointers for handling primitive
72 * @param inData Pointer to the input data to be transformed or examined.
73 * @param length Length of the data, counting bytes. May be -1 for preflighting.
74 * If length>=0, then transform the data.
75 * If length==-1, then only determine the length of the data.
76 * The length cannot be determined from the data itself for all
77 * types of data (e.g., not for simple arrays of integers).
78 * @param outData Pointer to the output data buffer.
79 * If length>=0 (transformation), then the output buffer must
80 * have a capacity of at least length.
81 * If length==-1, then outData will not be used and can be NULL.
82 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
83 * fulfill U_SUCCESS on input.
84 * @return The actual length of the data.
89 typedef int32_t U_CALLCONV
90 UDataSwapFn(const UDataSwapper
*ds
,
91 const void *inData
, int32_t length
, void *outData
,
92 UErrorCode
*pErrorCode
);
95 * Convert one uint16_t from input to platform endianness.
98 typedef uint16_t U_CALLCONV
99 UDataReadUInt16(uint16_t x
);
102 * Convert one uint32_t from input to platform endianness.
105 typedef uint32_t U_CALLCONV
106 UDataReadUInt32(uint32_t x
);
109 * Convert one uint16_t from platform to input endianness.
112 typedef void U_CALLCONV
113 UDataWriteUInt16(uint16_t *p
, uint16_t x
);
116 * Convert one uint32_t from platform to input endianness.
119 typedef void U_CALLCONV
120 UDataWriteUInt32(uint32_t *p
, uint32_t x
);
123 * Compare invariant-character strings, one in the output data and the
124 * other one caller-provided in Unicode.
125 * An output data string is compared because strings are usually swapped
126 * before the rest of the data, to allow for sorting of string tables
127 * according to the output charset.
128 * You can use -1 for the length parameters of NUL-terminated strings as usual.
129 * Returns Unicode code point order for invariant characters.
132 typedef int32_t U_CALLCONV
133 UDataCompareInvChars(const UDataSwapper
*ds
,
134 const char *outString
, int32_t outLength
,
135 const UChar
*localString
, int32_t localLength
);
138 * Function for message output when an error occurs during data swapping.
139 * A format string and variable number of arguments are passed
140 * like for vprintf().
142 * @param context A function-specific context pointer.
143 * @param fmt The format string.
144 * @param args The arguments for format string inserts.
148 typedef void U_CALLCONV
149 UDataPrintError(void *context
, const char *fmt
, va_list args
);
151 struct UDataSwapper
{
152 /** Input endianness. @internal ICU 2.8 */
154 /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
156 /** Output endianness. @internal ICU 2.8 */
157 UBool outIsBigEndian
;
158 /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */
161 /* basic functions for reading data values */
163 /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */
164 UDataReadUInt16
*readUInt16
;
165 /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */
166 UDataReadUInt32
*readUInt32
;
167 /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */
168 UDataCompareInvChars
*compareInvChars
;
170 /* basic functions for writing data values */
172 /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */
173 UDataWriteUInt16
*writeUInt16
;
174 /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */
175 UDataWriteUInt32
*writeUInt32
;
177 /* basic functions for data transformations */
179 /** Transform an array of 16-bit integers. @internal ICU 2.8 */
180 UDataSwapFn
*swapArray16
;
181 /** Transform an array of 32-bit integers. @internal ICU 2.8 */
182 UDataSwapFn
*swapArray32
;
183 /** Transform an array of 64-bit integers. @internal ICU 53 */
184 UDataSwapFn
*swapArray64
;
185 /** Transform an invariant-character string. @internal ICU 2.8 */
186 UDataSwapFn
*swapInvChars
;
189 * Function for message output when an error occurs during data swapping.
193 UDataPrintError
*printError
;
194 /** Context pointer for printError. @internal ICU 2.8 */
195 void *printErrorContext
;
200 U_CAPI UDataSwapper
* U_EXPORT2
201 udata_openSwapper(UBool inIsBigEndian
, uint8_t inCharset
,
202 UBool outIsBigEndian
, uint8_t outCharset
,
203 UErrorCode
*pErrorCode
);
206 * Open a UDataSwapper for the given input data and the specified output
208 * Values of -1 for any of the characteristics mean the local platform's
214 U_CAPI UDataSwapper
* U_EXPORT2
215 udata_openSwapperForInputData(const void *data
, int32_t length
,
216 UBool outIsBigEndian
, uint8_t outCharset
,
217 UErrorCode
*pErrorCode
);
219 U_CAPI
void U_EXPORT2
220 udata_closeSwapper(UDataSwapper
*ds
);
223 * Read the beginning of an ICU data piece, recognize magic bytes,
224 * swap the structure.
225 * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece.
227 * @return The size of the data header, in bytes.
231 U_CAPI
int32_t U_EXPORT2
232 udata_swapDataHeader(const UDataSwapper
*ds
,
233 const void *inData
, int32_t length
, void *outData
,
234 UErrorCode
*pErrorCode
);
237 * Convert one int16_t from input to platform endianness.
240 U_CAPI
int16_t U_EXPORT2
241 udata_readInt16(const UDataSwapper
*ds
, int16_t x
);
244 * Convert one int32_t from input to platform endianness.
247 U_CAPI
int32_t U_EXPORT2
248 udata_readInt32(const UDataSwapper
*ds
, int32_t x
);
251 * Swap a block of invariant, NUL-terminated strings, but not padding
252 * bytes after the last string.
255 U_CAPI
int32_t U_EXPORT2
256 udata_swapInvStringBlock(const UDataSwapper
*ds
,
257 const void *inData
, int32_t length
, void *outData
,
258 UErrorCode
*pErrorCode
);
260 U_CAPI
void U_EXPORT2
261 udata_printError(const UDataSwapper
*ds
,
265 /* internal exports from putil.c -------------------------------------------- */
267 /* declared here to keep them out of the public putil.h */
270 * Swap invariant char * strings ASCII->EBCDIC.
273 U_CAPI
int32_t U_EXPORT2
274 uprv_ebcdicFromAscii(const UDataSwapper
*ds
,
275 const void *inData
, int32_t length
, void *outData
,
276 UErrorCode
*pErrorCode
);
279 * Copy invariant ASCII char * strings and verify they are invariant.
283 uprv_copyAscii(const UDataSwapper
*ds
,
284 const void *inData
, int32_t length
, void *outData
,
285 UErrorCode
*pErrorCode
);
288 * Swap invariant char * strings EBCDIC->ASCII.
292 uprv_asciiFromEbcdic(const UDataSwapper
*ds
,
293 const void *inData
, int32_t length
, void *outData
,
294 UErrorCode
*pErrorCode
);
297 * Copy invariant EBCDIC char * strings and verify they are invariant.
301 uprv_copyEbcdic(const UDataSwapper
*ds
,
302 const void *inData
, int32_t length
, void *outData
,
303 UErrorCode
*pErrorCode
);
306 * Compare ASCII invariant char * with Unicode invariant UChar *
310 uprv_compareInvAscii(const UDataSwapper
*ds
,
311 const char *outString
, int32_t outLength
,
312 const UChar
*localString
, int32_t localLength
);
315 * Compare EBCDIC invariant char * with Unicode invariant UChar *
319 uprv_compareInvEbcdic(const UDataSwapper
*ds
,
320 const char *outString
, int32_t outLength
,
321 const UChar
*localString
, int32_t localLength
);
324 * \def uprv_compareInvWithUChar
325 * Compare an invariant-character strings with a UChar string
328 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
329 # define uprv_compareInvWithUChar uprv_compareInvAscii
330 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
331 # define uprv_compareInvWithUChar uprv_compareInvEbcdic
333 # error Unknown charset family!
336 // utrie_swap.cpp -----------------------------------------------------------***
339 * Swaps a serialized UTrie.
342 U_CAPI
int32_t U_EXPORT2
343 utrie_swap(const UDataSwapper
*ds
,
344 const void *inData
, int32_t length
, void *outData
,
345 UErrorCode
*pErrorCode
);
348 * Swaps a serialized UTrie2.
351 U_CAPI
int32_t U_EXPORT2
352 utrie2_swap(const UDataSwapper
*ds
,
353 const void *inData
, int32_t length
, void *outData
,
354 UErrorCode
*pErrorCode
);
357 * Swaps a serialized UCPTrie.
360 U_CAPI
int32_t U_EXPORT2
361 ucptrie_swap(const UDataSwapper
*ds
,
362 const void *inData
, int32_t length
, void *outData
,
363 UErrorCode
*pErrorCode
);
366 * Swaps a serialized UTrie, UTrie2, or UCPTrie.
369 U_CAPI
int32_t U_EXPORT2
370 utrie_swapAnyVersion(const UDataSwapper
*ds
,
371 const void *inData
, int32_t length
, void *outData
,
372 UErrorCode
*pErrorCode
);
374 /* material... -------------------------------------------------------------- */
381 * Public API function in udata.c
383 * Same as udata_openChoice() but automatically swaps the data.
384 * isAcceptable, if not NULL, may accept data with endianness and charset family
385 * different from the current platform's properties.
386 * If the data is acceptable and the platform properties do not match, then
387 * the swap function is called to swap an allocated version of the data.
388 * Preflighting may or may not be performed depending on whether the size of
389 * the loaded data item is known.
391 * @param isAcceptable Same as for udata_openChoice(). May be NULL.
395 U_CAPI UDataMemory
* U_EXPORT2
396 udata_openSwap(const char *path
, const char *type
, const char *name
,
397 UDataMemoryIsAcceptable
*isAcceptable
, void *isAcceptableContext
,
399 UDataPrintError
*printError
, void *printErrorContext
,
400 UErrorCode
*pErrorCode
);