JavaScriptCore-7601.1.46.3.tar.gz

[apple/javascriptcore.git] / icu / unicode / utf.h
diff --git a/icu/unicode/utf.h b/icu/unicode/utf.h

index 201691d09176720417f469370ed628a774b7b04e..f79479935d4a1d6b1befcd405cd04f52381b93e7 100644 (file)
--- a/icu/unicode/utf.h
+++ b/icu/unicode/utf.h
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2010, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -63,6 +63,14 @@
   * malformed sequences can be expressed unambiguously with a distinct subrange
   * of Unicode code points.)
   *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ *   U16_NEXT(s, i, length, c);
+ *   // use c
+ * }</pre>
+ *
   * When it is safe to assume that text is well-formed UTF-16
   * (does not contain single, unpaired surrogates), then one can use
   * U16_..._UNSAFE macros.
@@ -80,12 +88,14 @@
   * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
   * and are fast, while the safe UTF-8 macros call functions for all but the
   * trivial (ASCII) cases.
+ * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
+ * characters inline as well.)
   *
   * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
   * code point values (0..U+10ffff). They are indicated with negative values instead.
   *
   * For more information see the ICU User Guide Strings chapter
- * (http://oss.software.ibm.com/icu/userguide/).
+ * (http://icu-project.org/userguide/strings.html).
   *
   * <em>Usage:</em>
   * ICU coding guidelines for if() statements should be followed when using these macros.
@@ -157,13 +167,11 @@
           (uint32_t)(c)<=0x10ffff && \
           !U_IS_UNICODE_NONCHAR(c)))
  
-#ifndef U_HIDE_DRAFT_API
-
  /**
   * Is this code point a BMP code point (U+0000..U+ffff)?
   * @param c 32-bit code point
   * @return TRUE or FALSE
- * @draft ICU 2.8
+ * @stable ICU 2.8
   */
  #define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
  
@@ -171,11 +179,9 @@
   * Is this code point a supplementary code point (U+10000..U+10ffff)?
   * @param c 32-bit code point
   * @return TRUE or FALSE
- * @draft ICU 2.8
+ * @stable ICU 2.8
   */
  #define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
-
-#endif /*U_HIDE_DRAFT_API*/
   
  /**
   * Is this code point a lead surrogate (U+d800..U+dbff)?
@@ -210,6 +216,15 @@
   */
  #define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
  
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 32-bit code point
+ * @return TRUE or FALSE
+ * @stable ICU 4.2
+ */
+#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
  /* include the utfXX.h ------------------------------------------------------ */
  
  #include "unicode/utf8.h"