X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8f6c56a50524aa785f7e596d52dddfb331e18961..36401178fd6817c043cc00b0c00c7f723e58efae:/bsd/sys/utfconv.h diff --git a/bsd/sys/utfconv.h b/bsd/sys/utfconv.h index 1af22e908..bff9d0667 100644 --- a/bsd/sys/utfconv.h +++ b/bsd/sys/utfconv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -34,24 +34,159 @@ #ifdef KERNEL #ifdef __APPLE_API_UNSTABLE + /* * UTF-8 encode/decode flags */ -#define UTF_REVERSE_ENDIAN 0x01 /* reverse UCS-2 byte order */ -#define UTF_NO_NULL_TERM 0x02 /* do not add null termination */ -#define UTF_DECOMPOSED 0x04 /* generate fully decomposed UCS-2 */ -#define UTF_PRECOMPOSED 0x08 /* generate precomposed UCS-2 */ +#define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ +#define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ +#define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ +#define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ +#define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ +#define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ + +#define UTF_BIG_ENDIAN \ + ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) + +#define UTF_LITTLE_ENDIAN \ + ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) __BEGIN_DECLS -size_t utf8_encodelen(const u_int16_t *, size_t, u_int16_t, int); -int utf8_encodestr(const u_int16_t *, size_t, u_int8_t *, size_t *, - size_t, u_int16_t, int); +/* + * utf8_encodelen - Calculate the UTF-8 encoding length + * + * This function takes an Unicode input string, ucsp, of ucslen bytes + * and calculates the size of the UTF-8 output in bytes (not including + * a NULL termination byte). The string must reside in kernel memory. + * + * FLAGS + * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime + * + * UTF_BIG_ENDIAN: Unicode byte order is always big endian + * + * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian + * + * UTF_DECOMPOSED: assume fully decomposed output + * + * ERRORS + * None + */ +size_t +utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, + int flags); + + +/* + * utf8_encodestr - Encodes a Unicode string into UTF-8 + * + * This function takes an Unicode input string, ucsp, of ucslen bytes + * and produces the UTF-8 output into a buffer of buflen bytes pointed + * to by utf8p. The size of the output in bytes (not including a NULL + * termination byte) is returned in utf8len. The UTF-8 string output + * is NULL terminated. Both buffers must reside in kernel memory. + * + * If '/' chars are possible in the Unicode input then an alternate + * (replacement) char must be provided in altslash. + * + * FLAGS + * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime + * + * UTF_BIG_ENDIAN: Unicode byte order is always big endian + * + * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian + * + * UTF_NO_NULL_TERM: do not add null termination to output string + * + * UTF_DECOMPOSED: generate fully decomposed output + * + * ERRORS + * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded + * + * EINVAL: illegal Unicode char encountered + */ +int +utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, + size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); + + +/* + * utf8_decodestr - Decodes a UTF-8 string into Unicode + * + * This function takes an UTF-8 input string, utf8p, of utf8len bytes + * and produces the Unicode output into a buffer of buflen bytes pointed + * to by ucsp. The size of the output in bytes (not including a NULL + * termination byte) is returned in ucslen. Both buffers must reside + * in kernel memory. + * + * If '/' chars are allowed in the Unicode output then an alternate + * (replacement) char must be provided in altslash. + * + * FLAGS + * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime + * + * UTF_BIG_ENDIAN: Unicode byte order is always big endian + * + * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian + * + * UTF_DECOMPOSED: generate fully decomposed output (NFD) + * + * UTF_PRECOMPOSED: generate precomposed output (NFC) + * + * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input + * + * ERRORS + * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. + * + * EINVAL: illegal UTF-8 sequence encountered. + */ +int +utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, + size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); + -int utf8_decodestr(const u_int8_t *, size_t, u_int16_t *,size_t *, - size_t, u_int16_t, int); +/* + * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) + * + * This function takes an UTF-8 input string, instr, of inlen bytes + * and produces normalized UTF-8 output into a buffer of buflen bytes + * pointed to by outstr. The size of the output in bytes (not including + * a NULL termination byte) is returned in outlen. In-place conversions + * are not supported (i.e. instr != outstr). Both buffers must reside + * in kernel memory. + * + * FLAGS + * UTF_DECOMPOSED: output string will be fully decomposed (NFD) + * + * UTF_PRECOMPOSED: output string will be precomposed (NFC) + * + * UTF_NO_NULL_TERM: do not add null termination to output string + * + * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input + * + * ERRORS + * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes + * + * EINVAL: illegal UTF-8 sequence encountered or invalid flags + */ +int +utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, + size_t *outlen, size_t buflen, int flags); + + +/* + * utf8_validatestr - validates a UTF-8 string + * + * This function takes an UTF-8 input string, utf8p, of utf8len bytes + * and determines if its valid UTF-8. The string must reside in kernel + * memory. + * + * ERRORS + * EINVAL: illegal UTF-8 sequence encountered. + */ +int +utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); -int utf8_validatestr(const u_int8_t*, size_t); __END_DECLS