]>
git.saurik.com Git - apple/xnu.git/blob - bsd/sys/utfconv.h
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #ifndef _SYS_UTFCONV_H_ 
  30 #define _SYS_UTFCONV_H_ 
  32 #include <sys/appleapiopts.h> 
  33 #include <sys/cdefs.h>  
  36 #ifdef __APPLE_API_UNSTABLE 
  39  * UTF-8 encode/decode flags 
  41 #define UTF_REVERSE_ENDIAN   0x0001   /* reverse UCS-2 byte order */ 
  42 #define UTF_NO_NULL_TERM     0x0002   /* do not add null termination */ 
  43 #define UTF_DECOMPOSED       0x0004   /* generate fully decomposed UCS-2 */ 
  44 #define UTF_PRECOMPOSED      0x0008   /* generate precomposed UCS-2 */ 
  45 #define UTF_ESCAPE_ILLEGAL   0x0010   /* escape illegal UTF-8 */ 
  46 #define UTF_SFM_CONVERSIONS  0x0020   /* Use SFM mappings for illegal NTFS chars */ 
  48 #define UTF_BIG_ENDIAN       \ 
  49         ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 
  51 #define UTF_LITTLE_ENDIAN    \ 
  52         ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) 
  57  * utf8_encodelen - Calculate the UTF-8 encoding length 
  59  * This function takes an Unicode input string, ucsp, of ucslen bytes 
  60  * and calculates the size of the UTF-8 output in bytes (not including 
  61  * a NULL termination byte). The string must reside in kernel memory. 
  64  *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime 
  66  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian 
  68  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian 
  70  *    UTF_DECOMPOSED:  assume fully decomposed output 
  76 utf8_encodelen(const u_int16_t 
* ucsp
, size_t ucslen
, u_int16_t altslash
, 
  81  * utf8_encodestr - Encodes a Unicode string into UTF-8 
  83  * This function takes an Unicode input string, ucsp, of ucslen bytes 
  84  * and produces the UTF-8 output into a buffer of buflen bytes pointed 
  85  * to by utf8p. The size of the output in bytes (not including a NULL 
  86  * termination byte) is returned in utf8len. The UTF-8 string output 
  87  * is NULL terminated. Both buffers must reside in kernel memory. 
  89  * If '/' chars are possible in the Unicode input then an alternate 
  90  * (replacement) char must be provided in altslash. 
  93  *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime 
  95  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian 
  97  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian 
  99  *    UTF_NO_NULL_TERM:  do not add null termination to output string 
 101  *    UTF_DECOMPOSED:  generate fully decomposed output 
 104  *    ENAMETOOLONG:  output did not fit; only utf8len bytes were encoded 
 106  *    EINVAL:  illegal Unicode char encountered 
 109 utf8_encodestr(const u_int16_t 
* ucsp
, size_t ucslen
, u_int8_t 
* utf8p
, 
 110                size_t * utf8len
, size_t buflen
, u_int16_t altslash
, int flags
); 
 114  * utf8_decodestr - Decodes a UTF-8 string into Unicode 
 116  * This function takes an UTF-8 input string, utf8p, of utf8len bytes 
 117  * and produces the Unicode output into a buffer of buflen bytes pointed 
 118  * to by ucsp. The size of the output in bytes (not including a NULL 
 119  * termination byte) is returned in ucslen. Both buffers must reside 
 122  * If '/' chars are allowed in the Unicode output then an alternate 
 123  * (replacement) char must be provided in altslash. 
 126  *    UTF_REV_ENDIAN:  Unicode byte order is opposite current runtime 
 128  *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian 
 130  *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian 
 132  *    UTF_DECOMPOSED:  generate fully decomposed output (NFD) 
 134  *    UTF_PRECOMPOSED:  generate precomposed output (NFC) 
 136  *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input 
 139  *    ENAMETOOLONG:  output did not fit; only ucslen bytes were decoded. 
 141  *    EINVAL:  illegal UTF-8 sequence encountered. 
 144 utf8_decodestr(const u_int8_t
* utf8p
, size_t utf8len
, u_int16_t
* ucsp
, 
 145                size_t *ucslen
, size_t buflen
, u_int16_t altslash
, int flags
); 
 149  * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) 
 151  * This function takes an UTF-8 input string, instr, of inlen bytes 
 152  * and produces normalized UTF-8 output into a buffer of buflen bytes 
 153  * pointed to by outstr. The size of the output in bytes (not including 
 154  * a NULL termination byte) is returned in outlen. In-place conversions 
 155  * are not supported (i.e. instr != outstr).  Both buffers must reside 
 159  *    UTF_DECOMPOSED:  output string will be fully decomposed (NFD) 
 161  *    UTF_PRECOMPOSED:  output string will be precomposed (NFC) 
 163  *    UTF_NO_NULL_TERM:  do not add null termination to output string 
 165  *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input 
 168  *    ENAMETOOLONG:  output did not fit or input exceeded MAXPATHLEN bytes 
 170  *    EINVAL:  illegal UTF-8 sequence encountered or invalid flags 
 173 utf8_normalizestr(const u_int8_t
* instr
, size_t inlen
, u_int8_t
* outstr
, 
 174                   size_t *outlen
, size_t buflen
, int flags
); 
 178  * utf8_validatestr - validates a UTF-8 string 
 180  * This function takes an UTF-8 input string, utf8p, of utf8len bytes 
 181  * and determines if its valid UTF-8.  The string must reside in kernel 
 185  *    EINVAL:  illegal UTF-8 sequence encountered. 
 188 utf8_validatestr(const u_int8_t
* utf8p
, size_t utf8len
); 
 193 #endif /* __APPLE_API_UNSTABLE */ 
 196 #endif /* !_SYS_UTFCONV_H_ */