]>
git.saurik.com Git - apple/hfs.git/blob - livefiles_hfs_plugin/lf_hfs_sbunicode.h
1 /* Copyright © 2017-2018 Apple Inc. All rights reserved.
6 * Created by Oded Shoshani on 31/1/18.
9 #ifndef lf_hfs_sbunicode_h
10 #define lf_hfs_sbunicode_h
13 Includes Unicode 3.2 decomposition code derived from Core Foundation
17 * UTF-8 (Unicode Transformation Format)
19 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
20 * character as a sequence of one to four bytes. Only the shortest form
21 * required to represent the significant Unicode bits is legal.
23 * UTF-8 Multibyte Codes
25 * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
26 * -----------------------------------------------------------------------------
27 * 1 7 0x0000 0x007F 0xxxxxxx
28 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
29 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
30 * 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
31 * -----------------------------------------------------------------------------
35 * UTF-8 encode/decode flags
37 #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */
38 #define UTF_ADD_NULL_TERM 0x0002 /* add null termination */
39 #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */
40 #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */
41 #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */
42 #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */
44 #define UTF_BIG_ENDIAN \
45 ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
46 #define UTF_LITTLE_ENDIAN \
47 ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
52 * utf8_encodelen - Calculate the UTF-8 encoding length
54 * This function takes a Unicode input string, ucsp, of ucslen bytes
55 * and calculates the size of the UTF-8 output in bytes (not including
56 * a NULL termination byte). The string must reside in kernel memory.
58 * If '/' chars are possible in the Unicode input then an alternate
59 * (replacement) char should be provided in altslash.
62 * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime
64 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
66 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
68 * UTF_DECOMPOSED: generate fully decomposed output
70 * UTF_PRECOMPOSED is ignored since utf8_encodestr doesn't support it
75 size_t utf8_encodelen(const u_int16_t
* ucsp
, size_t ucslen
, u_int16_t altslash
, int flags
);
78 * utf8_encodestr - Encodes a Unicode string to UTF-8
81 * The resulting UTF-8 string is NULL terminated.
83 * If '/' chars are allowed on disk then an alternate
84 * (replacement) char must be provided in altslash.
87 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
89 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
91 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
93 * UTF_DECOMPOSED: generate fully decomposed output
95 * UTF_ADD_NULL_TERM: add NULL termination to UTF-8 output
98 * ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
100 * EINVAL: Illegal char found; char was replaced by an '_'.
102 extern int utf8_encodestr(const u_int16_t
* ucsp
, size_t ucslen
, u_int8_t
* utf8p
, size_t * utf8len
, size_t buflen
, u_int16_t altslash
, int flags
);
105 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
108 * The input UTF-8 string does not need to be null terminated
111 * If '/' chars are allowed on disk then an alternate
112 * (replacement) char must be provided in altslash.
115 * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime
117 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
119 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
121 * UTF_DECOMPOSED: generate fully decomposed output (NFD)
123 * UTF_PRECOMPOSED: generate precomposed output (NFC)
125 * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input
128 * ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
130 * EINVAL: Illegal UTF-8 sequence found.
132 int utf8_decodestr(const u_int8_t
* utf8p
, size_t utf8len
, u_int16_t
* ucsp
, size_t *ucslen
, size_t buflen
, u_int16_t altslash
, int flags
);
134 #endif /* lf_hfs_sbunicode_h */