xnu-1228.0.2.tar.gz

[apple/xnu.git] / bsd / sys / utfconv.h
diff --git a/bsd/sys/utfconv.h b/bsd/sys/utfconv.h

index 1af22e908d6572e1f82726fdcd21d3e15cd0fc71..bff9d066733b6939ac2de4d45b26b49ca669884c 100644 (file)
--- a/bsd/sys/utfconv.h
+++ b/bsd/sys/utfconv.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -34,24 +34,159 @@
  
  #ifdef KERNEL
  #ifdef __APPLE_API_UNSTABLE
+
  /*
   * UTF-8 encode/decode flags
   */
-#define        UTF_REVERSE_ENDIAN      0x01    /* reverse UCS-2 byte order */
-#define UTF_NO_NULL_TERM       0x02    /* do not add null termination */
-#define        UTF_DECOMPOSED          0x04    /* generate fully decomposed UCS-2 */
-#define        UTF_PRECOMPOSED         0x08    /* generate precomposed UCS-2 */
+#define        UTF_REVERSE_ENDIAN   0x0001   /* reverse UCS-2 byte order */
+#define UTF_NO_NULL_TERM     0x0002   /* do not add null termination */
+#define        UTF_DECOMPOSED       0x0004   /* generate fully decomposed UCS-2 */
+#define        UTF_PRECOMPOSED      0x0008   /* generate precomposed UCS-2 */
+#define UTF_ESCAPE_ILLEGAL   0x0010   /* escape illegal UTF-8 */
+#define UTF_SFM_CONVERSIONS  0x0020   /* Use SFM mappings for illegal NTFS chars */
+
+#define UTF_BIG_ENDIAN       \
+        ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
+
+#define UTF_LITTLE_ENDIAN    \
+        ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
  
  __BEGIN_DECLS
-size_t utf8_encodelen(const u_int16_t *, size_t, u_int16_t, int);
  
-int    utf8_encodestr(const u_int16_t *, size_t, u_int8_t *, size_t *,
-               size_t, u_int16_t, int);
+/*
+ * utf8_encodelen - Calculate the UTF-8 encoding length
+ *
+ * This function takes an Unicode input string, ucsp, of ucslen bytes
+ * and calculates the size of the UTF-8 output in bytes (not including
+ * a NULL termination byte). The string must reside in kernel memory.
+ *
+ * FLAGS
+ *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime
+ *
+ *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
+ *
+ *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
+ *
+ *    UTF_DECOMPOSED:  assume fully decomposed output
+ *
+ * ERRORS
+ *    None
+ */
+size_t
+utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
+               int flags);
+
+
+/*
+ * utf8_encodestr - Encodes a Unicode string into UTF-8
+ *
+ * This function takes an Unicode input string, ucsp, of ucslen bytes
+ * and produces the UTF-8 output into a buffer of buflen bytes pointed
+ * to by utf8p. The size of the output in bytes (not including a NULL
+ * termination byte) is returned in utf8len. The UTF-8 string output
+ * is NULL terminated. Both buffers must reside in kernel memory.
+ *
+ * If '/' chars are possible in the Unicode input then an alternate
+ * (replacement) char must be provided in altslash.
+ *
+ * FLAGS
+ *    UTF_REVERSE_ENDIAN:  Unicode byte order is opposite current runtime
+ *
+ *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
+ *
+ *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
+ *
+ *    UTF_NO_NULL_TERM:  do not add null termination to output string
+ *
+ *    UTF_DECOMPOSED:  generate fully decomposed output
+ *
+ * ERRORS
+ *    ENAMETOOLONG:  output did not fit; only utf8len bytes were encoded
+ *
+ *    EINVAL:  illegal Unicode char encountered
+ */
+int
+utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
+               size_t * utf8len, size_t buflen, u_int16_t altslash, int flags);
+
+
+/*
+ * utf8_decodestr - Decodes a UTF-8 string into Unicode
+ *
+ * This function takes an UTF-8 input string, utf8p, of utf8len bytes
+ * and produces the Unicode output into a buffer of buflen bytes pointed
+ * to by ucsp. The size of the output in bytes (not including a NULL
+ * termination byte) is returned in ucslen. Both buffers must reside
+ * in kernel memory.
+ *
+ * If '/' chars are allowed in the Unicode output then an alternate
+ * (replacement) char must be provided in altslash.
+ *
+ * FLAGS
+ *    UTF_REV_ENDIAN:  Unicode byte order is opposite current runtime
+ *
+ *    UTF_BIG_ENDIAN:  Unicode byte order is always big endian
+ *
+ *    UTF_LITTLE_ENDIAN:  Unicode byte order is always little endian
+ *
+ *    UTF_DECOMPOSED:  generate fully decomposed output (NFD)
+ *
+ *    UTF_PRECOMPOSED:  generate precomposed output (NFC)
+ *
+ *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input
+ *
+ * ERRORS
+ *    ENAMETOOLONG:  output did not fit; only ucslen bytes were decoded.
+ *
+ *    EINVAL:  illegal UTF-8 sequence encountered.
+ */
+int
+utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
+               size_t *ucslen, size_t buflen, u_int16_t altslash, int flags);
+
  
-int    utf8_decodestr(const u_int8_t *, size_t, u_int16_t *,size_t *,
-               size_t, u_int16_t, int);
+/*
+ * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD)
+ *
+ * This function takes an UTF-8 input string, instr, of inlen bytes
+ * and produces normalized UTF-8 output into a buffer of buflen bytes
+ * pointed to by outstr. The size of the output in bytes (not including
+ * a NULL termination byte) is returned in outlen. In-place conversions
+ * are not supported (i.e. instr != outstr).  Both buffers must reside
+ * in kernel memory.
+ *
+ * FLAGS
+ *    UTF_DECOMPOSED:  output string will be fully decomposed (NFD)
+ *
+ *    UTF_PRECOMPOSED:  output string will be precomposed (NFC)
+ *
+ *    UTF_NO_NULL_TERM:  do not add null termination to output string
+ *
+ *    UTF_ESCAPE_ILLEGAL:  percent escape any illegal UTF-8 input
+ *
+ * ERRORS
+ *    ENAMETOOLONG:  output did not fit or input exceeded MAXPATHLEN bytes
+ *
+ *    EINVAL:  illegal UTF-8 sequence encountered or invalid flags
+ */
+int
+utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr,
+                  size_t *outlen, size_t buflen, int flags);
+
+
+/*
+ * utf8_validatestr - validates a UTF-8 string
+ *
+ * This function takes an UTF-8 input string, utf8p, of utf8len bytes
+ * and determines if its valid UTF-8.  The string must reside in kernel
+ * memory.
+ *
+ * ERRORS
+ *    EINVAL:  illegal UTF-8 sequence encountered.
+ */
+int
+utf8_validatestr(const u_int8_t* utf8p, size_t utf8len);
  
-int    utf8_validatestr(const u_int8_t*, size_t);
  
  __END_DECLS