ICU-400.42.tar.gz

[apple/icu.git] / icuSources / common / unicode / unistr.h
diff --git a/icuSources/common/unicode/unistr.h b/icuSources/common/unicode/unistr.h

index 3cc6340465990ef50c9f5f8143133f6d0bb59318..9a96bdc27662374685ce76468f639b1e86ed78a6 100644 (file)
--- a/icuSources/common/unicode/unistr.h
+++ b/icuSources/common/unicode/unistr.h
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1998-2003, International Business Machines
+*   Copyright (C) 1998-2008, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *
@@ -21,8 +21,11 @@
  #ifndef UNISTR_H
  #define UNISTR_H
  
-#include "unicode/utypes.h"
-#include "unicode/uobject.h"
+/**
+ * \file 
+ * \brief C++ API: Unicode String 
+ */
+
  #include "unicode/rep.h"
  
  struct UConverter;          // unicode/ucnv.h
@@ -33,26 +36,39 @@ class  StringThreadTest;
  /**
   * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
   * Compare strings in code point order instead of code unit order.
- * @draft ICU 2.2
+ * @stable ICU 2.2
   */
  #define U_COMPARE_CODE_POINT_ORDER  0x8000
  #endif
  
  #ifndef USTRING_H
-/* see ustring.h */
-U_CAPI int32_t U_EXPORT2
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
  u_strlen(const UChar *s);
  #endif
  
  U_NAMESPACE_BEGIN
  
  class Locale;               // unicode/locid.h
-class UCharReference;
  class StringCharacterIterator;
  class BreakIterator;        // unicode/brkiter.h
  
  /* The <iostream> include has been moved to unicode/ustream.h */
  
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
+
  /**
   * Unicode String literals in C++.
   * Dependent on the platform properties, different UnicodeString
@@ -70,12 +86,14 @@ class BreakIterator;        // unicode/brkiter.h
   * such string variable before it is used.
   * @stable ICU 2.0
   */
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#if defined(U_DECLARE_UTF16)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
  #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
  #else
-#   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, "")
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
  #endif
  
  /**
@@ -91,13 +109,7 @@ class BreakIterator;        // unicode/brkiter.h
   * The string parameter must be a C string literal.
   * @stable ICU 2.0
   */
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
-#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
-#else
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, "")
-#endif
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  
  /**
   * UnicodeString is a string class that stores Unicode characters directly and provides
@@ -107,13 +119,12 @@ class BreakIterator;        // unicode/brkiter.h
   * The UnicodeString class is not suitable for subclassing.
   *
   * <p>For an overview of Unicode strings in C and C++ see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   *
   * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
- * A Unicode character may be stored with either
- * one code unit &mdash; which is the most common case &mdash; or with a matched pair of
- * special code units ("surrogates").
- * The data type for code units is UChar.<br>
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar. 
   * For single-character handling, a Unicode character code <em>point</em> is a value
   * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
   *
@@ -163,7 +174,7 @@ class BreakIterator;        // unicode/brkiter.h
   * significant performance improvements.
   * Also, the internal buffer is accessible via special functions.
   * For details see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   *
   * @see utf.h
   * @see CharacterIterator
@@ -173,6 +184,22 @@ class U_COMMON_API UnicodeString : public Replaceable
  {
  public:
  
+  /**
+   * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+   * which constructs a Unicode string from an invariant-character char * string.
+   * Use the macro US_INV instead of the full qualification for this value.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  enum EInvariant {
+    /**
+     * @see EInvariant
+     * @stable ICU 3.2
+     */
+    kInvariant
+  };
+
    //========================================
    // Read-only operations
    //========================================
@@ -200,8 +227,8 @@ public:
    /**
     * Greater than operator. Performs only bitwise comparison.
     * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in <TT>text</TT> are bitwise
-   * greater than the characters in this, FALSE otherwise
+   * @return TRUE if the characters in this are bitwise
+   * greater than the characters in <code>text</code>, FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool operator> (const UnicodeString& text) const;
@@ -209,8 +236,8 @@ public:
    /**
     * Less than operator. Performs only bitwise comparison.
     * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in <TT>text</TT> are bitwise
-   * less than the characters in this, FALSE otherwise
+   * @return TRUE if the characters in this are bitwise
+   * less than the characters in <code>text</code>, FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool operator< (const UnicodeString& text) const;
@@ -218,8 +245,8 @@ public:
    /**
     * Greater than or equal operator. Performs only bitwise comparison.
     * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in <TT>text</TT> are bitwise
-   * greater than or equal to the characters in this, FALSE otherwise
+   * @return TRUE if the characters in this are bitwise
+   * greater than or equal to the characters in <code>text</code>, FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool operator>= (const UnicodeString& text) const;
@@ -227,58 +254,58 @@ public:
    /**
     * Less than or equal operator. Performs only bitwise comparison.
     * @param text The UnicodeString to compare to this one.
-   * @return TRUE if the characters in <TT>text</TT> are bitwise
-   * less than or equal to the characters in this, FALSE otherwise
+   * @return TRUE if the characters in this are bitwise
+   * less than or equal to the characters in <code>text</code>, FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool operator<= (const UnicodeString& text) const;
  
    /**
     * Compare the characters bitwise in this UnicodeString to
-   * the characters in <TT>text</TT>.
+   * the characters in <code>text</code>.
     * @param text The UnicodeString to compare to this one.
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
     * @stable ICU 2.0
     */
    inline int8_t compare(const UnicodeString& text) const;
  
    /**
-   * Compare the characters bitwise in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters 
-   * in <TT>srcText</TT>
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>text</TT>
     * @param start the offset at which the compare operation begins
     * @param length the number of characters of text to compare.
-   * @param srcText the text to be compared
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @param text the other text to be compared against this string.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>text</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>text</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>text</code>.
     * @stable ICU 2.0
     */
    inline int8_t compare(int32_t start,
           int32_t length,
-         const UnicodeString& srcText) const;
+         const UnicodeString& text) const;
  
    /**
-   * Compare the characters bitwise in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters 
-   * in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).  
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param start the offset at which the compare operation begins
     * @param length the number of characters in this to compare.
     * @param srcText the text to be compared
     * @param srcStart the offset into <TT>srcText</TT> to start comparison
     * @param srcLength the number of characters in <TT>src</TT> to compare
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
     * @stable ICU 2.0
     */
     inline int8_t compare(int32_t start,
@@ -288,32 +315,32 @@ public:
           int32_t srcLength) const;
  
    /**
-   * Compare the characters bitwise in this UnicodeString with the first 
+   * Compare the characters bitwise in this UnicodeString with the first
     * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
     * @param srcChars The characters to compare to this UnicodeString.
     * @param srcLength the number of characters in <TT>srcChars</TT> to compare
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
     * @stable ICU 2.0
     */
    inline int8_t compare(const UChar *srcChars,
           int32_t srcLength) const;
  
    /**
-   * Compare the characters bitwise in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the first 
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the first
     * <TT>length</TT> characters in <TT>srcChars</TT>
     * @param start the offset at which the compare operation begins
     * @param length the number of characters to compare.
     * @param srcChars the characters to be compared
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
     * @stable ICU 2.0
     */
    inline int8_t compare(int32_t start,
@@ -321,20 +348,20 @@ public:
           const UChar *srcChars) const;
  
    /**
-   * Compare the characters bitwise in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters 
-   * in <TT>srcChars</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).  
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param start the offset at which the compare operation begins
     * @param length the number of characters in this to compare
     * @param srcChars the characters to be compared
     * @param srcStart the offset into <TT>srcChars</TT> to start comparison
     * @param srcLength the number of characters in <TT>srcChars</TT> to compare
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcChars</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcChars</code>.
     * @stable ICU 2.0
     */
    inline int8_t compare(int32_t start,
@@ -344,20 +371,20 @@ public:
           int32_t srcLength) const;
  
    /**
-   * Compare the characters bitwise in the range 
-   * [<TT>start</TT>, <TT>limit</TT>) with the characters 
-   * in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).  
+   * Compare the characters bitwise in the range
+   * [<TT>start</TT>, <TT>limit</TT>) with the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
     * @param start the offset at which the compare operation begins
     * @param limit the offset immediately following the compare operation
     * @param srcText the text to be compared
     * @param srcStart the offset into <TT>srcText</TT> to start comparison
     * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
-   * @return The result of bitwise character comparison: 0 if <TT>text</TT>
-   * contains the same characters as this, -1 if the characters in 
-   * <TT>text</TT> are bitwise less than the characters in this, +1 if the
-   * characters in <TT>text</TT> are bitwise greater than the characters 
-   * in this.
+   * @return The result of bitwise character comparison: 0 if this
+   * contains the same characters as <code>srcText</code>, -1 if the characters in
+   * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+   * characters in this are bitwise greater than the characters
+   * in <code>srcText</code>.
     * @stable ICU 2.0
     */
    inline int8_t compareBetween(int32_t start,
@@ -368,7 +395,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -387,7 +414,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -410,7 +437,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -437,7 +464,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -458,7 +485,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -481,7 +508,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -508,7 +535,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -716,19 +743,19 @@ public:
    /**
     * Determine if this starts with the characters in <TT>text</TT>
     * @param text The text to match.
-   * @return TRUE if this starts with the characters in <TT>text</TT>, 
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool startsWith(const UnicodeString& text) const;
  
    /**
-   * Determine if this starts with the characters in <TT>srcText</TT> 
-   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).   
+   * Determine if this starts with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcText The text to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @return TRUE if this starts with the characters in <TT>text</TT>, 
+   * @return TRUE if this starts with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
@@ -740,7 +767,7 @@ public:
     * Determine if this starts with the characters in <TT>srcChars</TT>
     * @param srcChars The characters to match.
     * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 
+   * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
@@ -748,8 +775,8 @@ public:
              int32_t srcLength) const;
  
    /**
-   * Determine if this ends with the characters in <TT>srcChars</TT> 
-   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).   
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcChars The characters to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
@@ -763,19 +790,19 @@ public:
    /**
     * Determine if this ends with the characters in <TT>text</TT>
     * @param text The text to match.
-   * @return TRUE if this ends with the characters in <TT>text</TT>, 
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
    inline UBool endsWith(const UnicodeString& text) const;
  
    /**
-   * Determine if this ends with the characters in <TT>srcText</TT> 
-   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).   
+   * Determine if this ends with the characters in <TT>srcText</TT>
+   * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcText The text to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
-   * @return TRUE if this ends with the characters in <TT>text</TT>, 
+   * @return TRUE if this ends with the characters in <TT>text</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
@@ -787,7 +814,7 @@ public:
     * Determine if this ends with the characters in <TT>srcChars</TT>
     * @param srcChars The characters to match.
     * @param srcLength the number of characters in <TT>srcChars</TT>
-   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
@@ -795,12 +822,12 @@ public:
            int32_t srcLength) const;
  
    /**
-   * Determine if this ends with the characters in <TT>srcChars</TT> 
-   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).   
+   * Determine if this ends with the characters in <TT>srcChars</TT>
+   * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * @param srcChars The characters to match.
     * @param srcStart the offset into <TT>srcText</TT> to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
-   * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 
+   * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
     * FALSE otherwise
     * @stable ICU 2.0
     */
@@ -815,7 +842,7 @@ public:
     * Locate in this the first occurrence of the characters in <TT>text</TT>,
     * using bitwise comparison.
     * @param text The text to search for.
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -826,7 +853,7 @@ public:
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -835,12 +862,12 @@ public:
  
    /**
     * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>text</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -851,16 +878,16 @@ public:
    /**
     * Locate in this the first occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
-   *  in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 
-   * using bitwise comparison.   
+   *  in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
     * @param srcText The text to search for.
     * @param srcStart the offset into <TT>srcText</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -872,12 +899,12 @@ public:
  
    /**
     * Locate in this the first occurrence of the characters in
-   * <TT>srcChars</TT> 
-   * starting at offset <TT>start</TT>, using bitwise comparison. 
+   * <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -887,13 +914,13 @@ public:
  
    /**
     * Locate in this the first occurrence in the range
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>srcChars</TT>, 
+   * @return The offset into this of the start of <TT>srcChars</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -901,20 +928,20 @@ public:
                int32_t srcLength,
                int32_t start,
                int32_t length) const;
- 
+
    /**
-   * Locate in this the first occurrence in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
-   * in <TT>srcChars</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 
-   * using bitwise comparison.   
+   * Locate in this the first occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
     * @param srcChars The text to search for.
-   * @param srcStart the offset into <TT>srcChars</TT> at which 
+   * @param srcStart the offset into <TT>srcChars</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -934,7 +961,7 @@ public:
    inline int32_t indexOf(UChar c) const;
  
    /**
-   * Locate in this the first occurrence of the code point <TT>c</TT>, 
+   * Locate in this the first occurrence of the code point <TT>c</TT>,
     * using bitwise comparison.
     *
     * @param c The code point to search for.
@@ -968,8 +995,8 @@ public:
  
    /**
     * Locate in this the first occurrence of the BMP code point <code>c</code>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
-   * using bitwise comparison.   
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
     * @param c The code unit to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
@@ -981,9 +1008,9 @@ public:
                int32_t length) const;
  
    /**
-   * Locate in this the first occurrence of the code point <TT>c</TT> 
-   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
-   * using bitwise comparison.   
+   * Locate in this the first occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
     *
     * @param c The code point to search for.
     * @param start the offset into this at which to start matching
@@ -996,10 +1023,10 @@ public:
                int32_t length) const;
  
    /**
-   * Locate in this the last occurrence of the characters in <TT>text</TT>, 
+   * Locate in this the last occurrence of the characters in <TT>text</TT>,
     * using bitwise comparison.
     * @param text The text to search for.
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1010,7 +1037,7 @@ public:
     * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1018,13 +1045,13 @@ public:
                int32_t start) const;
  
    /**
-   * Locate in this the last occurrence in the range 
+   * Locate in this the last occurrence in the range
     * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>text</TT>, using bitwise comparison.
     * @param text The text to search for.
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1033,18 +1060,18 @@ public:
                int32_t length) const;
  
    /**
-   * Locate in this the last occurrence in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
-   * in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 
-   * using bitwise comparison.   
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
     * @param srcText The text to search for.
-   * @param srcStart the offset into <TT>srcText</TT> at which 
+   * @param srcStart the offset into <TT>srcText</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcText</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1055,12 +1082,12 @@ public:
                int32_t length) const;
  
    /**
-   * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 
-   * starting at offset <TT>start</TT>, using bitwise comparison. 
+   * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+   * starting at offset <TT>start</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1069,14 +1096,14 @@ public:
                int32_t start) const;
  
    /**
-   * Locate in this the last occurrence in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
     * in <TT>srcChars</TT>, using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcLength the number of characters in <TT>srcChars</TT>
     * @param start The offset at which searching will start.
     * @param length The number of characters to search
-   * @return The offset into this of the start of <TT>srcChars</TT>, 
+   * @return The offset into this of the start of <TT>srcChars</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1084,20 +1111,20 @@ public:
                int32_t srcLength,
                int32_t start,
                int32_t length) const;
- 
+
    /**
-   * Locate in this the last occurrence in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) of the characters 
-   * in <TT>srcChars</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 
-   * using bitwise comparison.   
+   * Locate in this the last occurrence in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+   * in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+   * using bitwise comparison.
     * @param srcChars The text to search for.
     * @param srcStart the offset into <TT>srcChars</TT> at which
     * to start matching
     * @param srcLength the number of characters in <TT>srcChars</TT> to match
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
-   * @return The offset into this of the start of <TT>text</TT>, 
+   * @return The offset into this of the start of <TT>text</TT>,
     * or -1 if not found.
     * @stable ICU 2.0
     */
@@ -1117,7 +1144,7 @@ public:
    inline int32_t lastIndexOf(UChar c) const;
  
    /**
-   * Locate in this the last occurrence of the code point <TT>c</TT>, 
+   * Locate in this the last occurrence of the code point <TT>c</TT>,
     * using bitwise comparison.
     *
     * @param c The code point to search for.
@@ -1151,8 +1178,8 @@ public:
  
    /**
     * Locate in this the last occurrence of the BMP code point <code>c</code>
-   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
-   * using bitwise comparison.   
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
     * @param c The code unit to search for.
     * @param start the offset into this at which to start matching
     * @param length the number of characters in this to search
@@ -1164,9 +1191,9 @@ public:
                int32_t length) const;
  
    /**
-   * Locate in this the last occurrence of the code point <TT>c</TT> 
-   * in the range [<TT>start</TT>, <TT>start + length</TT>), 
-   * using bitwise comparison.   
+   * Locate in this the last occurrence of the code point <TT>c</TT>
+   * in the range [<TT>start</TT>, <TT>start + length</TT>),
+   * using bitwise comparison.
     *
     * @param c The code point to search for.
     * @param start the offset into this at which to start matching
@@ -1303,7 +1330,7 @@ public:
    /* Substring extraction */
  
    /**
-   * Copy the characters in the range 
+   * Copy the characters in the range
     * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
     * beginning at <tt>dstStart</tt>.
     * If the string aliases to <code>dst</code> itself as an external buffer,
@@ -1317,9 +1344,9 @@ public:
     * will be extracted
     * @stable ICU 2.0
     */
-  inline void extract(int32_t start, 
-           int32_t length, 
-           UChar *dst, 
+  inline void extract(int32_t start,
+           int32_t length,
+           UChar *dst,
             int32_t dstStart = 0) const;
  
    /**
@@ -1348,7 +1375,7 @@ public:
            UErrorCode &errorCode) const;
  
    /**
-   * Copy the characters in the range 
+   * Copy the characters in the range
     * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
     * <tt>target</tt>.
     * @param start offset of first character which will be copied
@@ -1362,23 +1389,23 @@ public:
             UnicodeString& target) const;
  
    /**
-   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
     * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
     * @param start offset of first character which will be copied into the array
     * @param limit offset immediately following the last character to be copied
-   * @param dst array in which to copy characters.  The length of <tt>dst</tt> 
+   * @param dst array in which to copy characters.  The length of <tt>dst</tt>
     * must be at least (<tt>dstStart + (limit - start)</tt>).
     * @param dstStart the offset in <TT>dst</TT> where the first character
     * will be extracted
     * @stable ICU 2.0
     */
-  inline void extractBetween(int32_t start, 
-              int32_t limit, 
-              UChar *dst, 
+  inline void extractBetween(int32_t start,
+              int32_t limit,
+              UChar *dst,
                int32_t dstStart = 0) const;
  
    /**
-   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 
+   * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
     * into the UnicodeString <tt>target</tt>.  Replaceable API.
     * @param start offset of first character which will be copied
     * @param limit offset immediately following the last character to be copied
@@ -1392,14 +1419,48 @@ public:
  
    /**
     * Copy the characters in the range 
+   * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+   * All characters must be invariant (see utypes.h).
+   * Use US_INV as the last, signature-distinguishing parameter.
+   *
+   * This function does not write any more than <code>targetLength</code>
+   * characters but returns the length of the entire output string
+   * so that one can allocate a larger buffer and call the function again
+   * if necessary.
+   * The output string is NUL-terminated if possible.
+   *
+   * @param start offset of first character which will be copied
+   * @param startLength the number of characters to extract
+   * @param target the target buffer for extraction, can be NULL
+   *               if targetLength is 0
+   * @param targetCapacity the length of the target buffer
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   * @return the output string length, not including the terminating NUL
+   * @stable ICU 3.2
+   */
+  int32_t extract(int32_t start,
+           int32_t startLength,
+           char *target,
+           int32_t targetCapacity,
+           enum EInvariant inv) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+  /**
+   * Copy the characters in the range
     * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
     * in a specified codepage.
     * The output string is NUL-terminated.
     *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
     * @param start offset of first character which will be copied
     * @param startLength the number of characters to extract
     * @param target the target buffer for extraction
-   * @param codepage the desired codepage for the characters.  0 has 
+   * @param codepage the desired codepage for the characters.  0 has
     * the special meaning of the default codepage
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
@@ -1416,7 +1477,7 @@ public:
                   const char *codepage = 0) const;
  
    /**
-   * Copy the characters in the range 
+   * Copy the characters in the range
     * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
     * in a specified codepage.
     * This function does not write any more than <code>targetLength</code>
@@ -1425,11 +1486,16 @@ public:
     * if necessary.
     * The output string is NUL-terminated if possible.
     *
+   * Recommendation: For invariant-character strings use
+   * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
     * @param start offset of first character which will be copied
     * @param startLength the number of characters to extract
     * @param target the target buffer for extraction
     * @param targetLength the length of the target buffer
-   * @param codepage the desired codepage for the characters.  0 has 
+   * @param codepage the desired codepage for the characters.  0 has
     * the special meaning of the default codepage
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
@@ -1466,12 +1532,16 @@ public:
                    UConverter *cnv,
                    UErrorCode &errorCode) const;
  
+#endif
+
    /* Length operations */
  
    /**
-   * Return the length of the UnicodeString object.  
-   * The length is the number of characters in the text.
+   * Return the length of the UnicodeString object.
+   * The length is the number of UChar code units are in the UnicodeString.
+   * If you want the number of code points, please use countChar32().
     * @return the length of the UnicodeString object
+   * @see countChar32
     * @stable ICU 2.0
     */
    inline int32_t length(void) const;
@@ -1486,6 +1556,7 @@ public:
     * @param start the index of the first code unit to check
     * @param length the number of UChar code units to check
     * @return the number of code points in the specified code units
+   * @see length
     * @stable ICU 2.0
     */
    int32_t
@@ -1512,7 +1583,7 @@ public:
     *         than 'number'. Same as (u_countChar32(s, length)>number).
     * @see countChar32
     * @see u_strHasMoreChar32Than
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    UBool
    hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
@@ -1591,7 +1662,7 @@ public:
     *
     * @param src The text containing the characters to replace.
     * @return a reference to this
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    UnicodeString &fastCopyFrom(const UnicodeString &src);
  
@@ -1615,21 +1686,21 @@ public:
  
    /**
     * Set the text in the UnicodeString object to the characters
-   * in <TT>srcText</TT> in the range 
+   * in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
     * <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
     * @param srcStart the offset into <TT>srcText</TT> where new characters
     * will be obtained
     * @return a reference to this
-   * @draft ICU2.2
+   * @stable ICU 2.2
     */
-  inline UnicodeString& setTo(const UnicodeString& srcText, 
+  inline UnicodeString& setTo(const UnicodeString& srcText,
                 int32_t srcStart);
  
    /**
     * Set the text in the UnicodeString object to the characters
-   * in <TT>srcText</TT> in the range 
+   * in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
@@ -1640,13 +1711,13 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& setTo(const UnicodeString& srcText, 
-               int32_t srcStart, 
+  inline UnicodeString& setTo(const UnicodeString& srcText,
+               int32_t srcStart,
                 int32_t srcLength);
  
    /**
-   * Set the text in the UnicodeString object to the characters in 
-   * <TT>srcText</TT>.  
+   * Set the text in the UnicodeString object to the characters in
+   * <TT>srcText</TT>.
     * <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
     * @return a reference to this
@@ -1668,7 +1739,7 @@ public:
    /**
     * Set the characters in the UnicodeString object to the code unit
     * <TT>srcChar</TT>.
-   * @param srcChar the code unit which becomes the UnicodeString's character 
+   * @param srcChar the code unit which becomes the UnicodeString's character
     * content
     * @return a reference to this
     * @stable ICU 2.0
@@ -1678,7 +1749,7 @@ public:
    /**
     * Set the characters in the UnicodeString object to the code point
     * <TT>srcChar</TT>.
-   * @param srcChar the code point which becomes the UnicodeString's character 
+   * @param srcChar the code point which becomes the UnicodeString's character
     * content
     * @return a reference to this
     * @stable ICU 2.0
@@ -1781,7 +1852,7 @@ public:
     * @return A reference to this
     * @stable ICU 2.0
     */
-  UnicodeString& setCharAt(int32_t offset, 
+  UnicodeString& setCharAt(int32_t offset,
                 UChar ch);
  
  
@@ -1817,24 +1888,24 @@ public:
  
    /**
     * Append the characters
-   * in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 
-   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 
+   * in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
+   * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
     * is not modified.
     * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters 
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in 
+   * @param srcLength the number of characters in <TT>srcText</TT> in
     * the append string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& append(const UnicodeString& srcText, 
-            int32_t srcStart, 
+  inline UnicodeString& append(const UnicodeString& srcText,
+            int32_t srcStart,
              int32_t srcLength);
  
    /**
-   * Append the characters in <TT>srcText</TT> to the UnicodeString object at 
+   * Append the characters in <TT>srcText</TT> to the UnicodeString object at
     * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
     * @param srcText the source for the new characters
     * @return a reference to this
@@ -1843,24 +1914,24 @@ public:
    inline UnicodeString& append(const UnicodeString& srcText);
  
    /**
-   * Append the characters in <TT>srcChars</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 
-   * object at offset 
+   * Append the characters in <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
+   * object at offset
     * <TT>start</TT>. <TT>srcChars</TT> is not modified.
     * @param srcChars the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters 
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT> in 
+   * @param srcLength the number of characters in <TT>srcChars</TT> in
     * the append string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& append(const UChar *srcChars, 
-            int32_t srcStart, 
+  inline UnicodeString& append(const UChar *srcChars,
+            int32_t srcStart,
              int32_t srcLength);
  
    /**
-   * Append the characters in <TT>srcChars</TT> to the UnicodeString object 
+   * Append the characters in <TT>srcChars</TT> to the UnicodeString object
     * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
     * @param srcChars the source for the new characters
     * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
@@ -1890,54 +1961,54 @@ public:
    /* Insert operations */
  
    /**
-   * Insert the characters in <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 
+   * Insert the characters in <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
     * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
     * @param start the offset where the insertion begins
-   * @param srcText the source for the new characters 
-   * @param srcStart the offset into <TT>srcText</TT> where new characters 
+   * @param srcText the source for the new characters
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in 
+   * @param srcLength the number of characters in <TT>srcText</TT> in
     * the insert string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
-            const UnicodeString& srcText, 
-            int32_t srcStart, 
+  inline UnicodeString& insert(int32_t start,
+            const UnicodeString& srcText,
+            int32_t srcStart,
              int32_t srcLength);
  
    /**
     * Insert the characters in <TT>srcText</TT> into the UnicodeString object
     * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
     * @param start the offset where the insertion begins
-   * @param srcText the source for the new characters 
+   * @param srcText the source for the new characters
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
+  inline UnicodeString& insert(int32_t start,
              const UnicodeString& srcText);
  
    /**
-   * Insert the characters in <TT>srcChars</TT> in the range 
+   * Insert the characters in <TT>srcChars</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
     *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
     * @param start the offset at which the insertion begins
     * @param srcChars the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters 
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT> 
+   * @param srcLength the number of characters in <TT>srcChars</TT>
     * in the insert string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
-            const UChar *srcChars, 
-            int32_t srcStart, 
+  inline UnicodeString& insert(int32_t start,
+            const UChar *srcChars,
+            int32_t srcStart,
              int32_t srcLength);
  
    /**
-   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 
+   * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
     * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
     * @param start the offset where the insertion begins
     * @param srcChars the source for the new characters
@@ -1945,61 +2016,61 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
+  inline UnicodeString& insert(int32_t start,
              const UChar *srcChars,
              int32_t srcLength);
  
    /**
-   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 
+   * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
     * offset <TT>start</TT>.
     * @param start the offset at which the insertion occurs
     * @param srcChar the code unit to insert
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
+  inline UnicodeString& insert(int32_t start,
              UChar srcChar);
  
    /**
-   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 
+   * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
     * offset <TT>start</TT>.
     * @param start the offset at which the insertion occurs
     * @param srcChar the code point to insert
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& insert(int32_t start, 
+  inline UnicodeString& insert(int32_t start,
              UChar32 srcChar);
  
  
    /* Replace operations */
  
    /**
-   * Replace the characters in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 
-   * <TT>srcText</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcText</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
     * <TT>srcText</TT> is not modified.
     * @param start the offset at which the replace operation begins
-   * @param length the number of characters to replace. The character at 
+   * @param length the number of characters to replace. The character at
     * <TT>start + length</TT> is not modified.
     * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcText</TT> where new characters 
+   * @param srcStart the offset into <TT>srcText</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcText</TT> in 
+   * @param srcLength the number of characters in <TT>srcText</TT> in
     * the replace string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  UnicodeString& replace(int32_t start, 
-             int32_t length, 
-             const UnicodeString& srcText, 
-             int32_t srcStart, 
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UnicodeString& srcText,
+             int32_t srcStart,
               int32_t srcLength);
  
    /**
-   * Replace the characters in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) 
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>)
     * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
     *  not modified.
     * @param start the offset at which the replace operation begins
@@ -2009,15 +2080,15 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  UnicodeString& replace(int32_t start, 
-             int32_t length, 
+  UnicodeString& replace(int32_t start,
+             int32_t length,
               const UnicodeString& srcText);
  
    /**
-   * Replace the characters in the range 
-   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 
-   * <TT>srcChars</TT> in the range 
-   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 
+   * Replace the characters in the range
+   * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+   * <TT>srcChars</TT> in the range
+   * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
     * is not modified.
     * @param start the offset at which the replace operation begins
     * @param length the number of characters to replace.  The character at
@@ -2025,19 +2096,19 @@ public:
     * @param srcChars the source for the new characters
     * @param srcStart the offset into <TT>srcChars</TT> where new characters
     * will be obtained
-   * @param srcLength the number of characters in <TT>srcChars</TT> 
+   * @param srcLength the number of characters in <TT>srcChars</TT>
     * in the replace string
     * @return a reference to this
     * @stable ICU 2.0
     */
-  UnicodeString& replace(int32_t start, 
-             int32_t length, 
-             const UChar *srcChars, 
-             int32_t srcStart, 
+  UnicodeString& replace(int32_t start,
+             int32_t length,
+             const UChar *srcChars,
+             int32_t srcStart,
               int32_t srcLength);
  
    /**
-   * Replace the characters in the range 
+   * Replace the characters in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
     * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
     * @param start the offset at which the replace operation begins
@@ -2048,13 +2119,13 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replace(int32_t start, 
-             int32_t length, 
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
               const UChar *srcChars,
               int32_t srcLength);
  
    /**
-   * Replace the characters in the range 
+   * Replace the characters in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
     * <TT>srcChar</TT>.
     * @param start the offset at which the replace operation begins
@@ -2064,12 +2135,12 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replace(int32_t start, 
-             int32_t length, 
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
               UChar srcChar);
  
    /**
-   * Replace the characters in the range 
+   * Replace the characters in the range
     * [<TT>start</TT>, <TT>start + length</TT>) with the code point
     * <TT>srcChar</TT>.
     * @param start the offset at which the replace operation begins
@@ -2079,12 +2150,12 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replace(int32_t start, 
-             int32_t length, 
+  inline UnicodeString& replace(int32_t start,
+             int32_t length,
               UChar32 srcChar);
  
    /**
-   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
     * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
     * @param start the offset at which the replace operation begins
     * @param limit the offset immediately following the replace range
@@ -2092,28 +2163,28 @@ public:
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replaceBetween(int32_t start, 
-                int32_t limit, 
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
                  const UnicodeString& srcText);
  
    /**
-   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 
-   * with the characters in <TT>srcText</TT> in the range 
+   * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+   * with the characters in <TT>srcText</TT> in the range
     * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
     * @param start the offset at which the replace operation begins
     * @param limit the offset immediately following the replace range
     * @param srcText the source for the new characters
-   * @param srcStart the offset into <TT>srcChars</TT> where new characters 
+   * @param srcStart the offset into <TT>srcChars</TT> where new characters
     * will be obtained
-   * @param srcLimit the offset immediately following the range to copy 
+   * @param srcLimit the offset immediately following the range to copy
     * in <TT>srcText</TT>
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& replaceBetween(int32_t start, 
-                int32_t limit, 
-                const UnicodeString& srcText, 
-                int32_t srcStart, 
+  inline UnicodeString& replaceBetween(int32_t start,
+                int32_t limit,
+                const UnicodeString& srcText,
+                int32_t srcStart,
                  int32_t srcLimit);
  
    /**
@@ -2133,15 +2204,15 @@ public:
    /**
     * Replaceable API
     * @return TRUE if it has MetaData
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    virtual UBool hasMetaData() const;
-  
+
    /**
     * Copy a substring of this object, retaining attribute (out-of-band)
     * information.  This method is used to duplicate or reorder substrings.
     * The destination index must not overlap the source range.
-   * 
+   *
     * @param start the beginning index, inclusive; <code>0 <= start <=
     * limit</code>.
     * @param limit the ending index, exclusive; <code>start <= limit <=
@@ -2157,7 +2228,7 @@ public:
    /* Search and replace operations */
  
    /**
-   * Replace all occurrences of characters in oldText with the characters 
+   * Replace all occurrences of characters in oldText with the characters
     * in newText
     * @param oldText the text containing the search text
     * @param newText the text containing the replacement text
@@ -2168,7 +2239,7 @@ public:
                  const UnicodeString& newText);
  
    /**
-   * Replace all occurrences of characters in oldText with characters 
+   * Replace all occurrences of characters in oldText with characters
     * in newText
     * in the range [<TT>start</TT>, <TT>start + length</TT>).
     * @param start the start of the range in which replace will performed
@@ -2184,10 +2255,10 @@ public:
                  const UnicodeString& newText);
  
    /**
-   * Replace all occurrences of characters in oldText in the range 
-   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 
-   * in newText in the range 
-   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 
+   * Replace all occurrences of characters in oldText in the range
+   * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
+   * in newText in the range
+   * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
     * in the range [<TT>start</TT>, <TT>start + length</TT>).
     * @param start the start of the range in which replace will performed
     * @param length the length of the range in which replace will be performed
@@ -2220,18 +2291,18 @@ public:
    inline UnicodeString& remove(void);
  
    /**
-   * Remove the characters in the range 
+   * Remove the characters in the range
     * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
     * @param start the offset of the first character to remove
     * @param length the number of characters to remove
     * @return a reference to this
     * @stable ICU 2.0
     */
-  inline UnicodeString& remove(int32_t start, 
+  inline UnicodeString& remove(int32_t start,
                                 int32_t length = (int32_t)INT32_MAX);
  
    /**
-   * Remove the characters in the range 
+   * Remove the characters in the range
     * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
     * @param start the offset of the first character to remove
     * @param limit the offset immediately following the range to remove
@@ -2245,12 +2316,12 @@ public:
    /* Length operations */
  
    /**
-   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.  
-   * If the length of this UnicodeString is less than targetLength, 
+   * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
     * length() - targetLength copies of padChar will be added to the
     * beginning of this UnicodeString.
     * @param targetLength the desired length of the string
-   * @param padChar the character to use for padding. Defaults to 
+   * @param padChar the character to use for padding. Defaults to
     * space (U+0020)
     * @return TRUE if the text was padded, FALSE otherwise.
     * @stable ICU 2.0
@@ -2259,12 +2330,12 @@ public:
                      UChar padChar = 0x0020);
  
    /**
-   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.  
-   * If the length of this UnicodeString is less than targetLength, 
+   * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
+   * If the length of this UnicodeString is less than targetLength,
     * length() - targetLength copies of padChar will be added to the
     * end of this UnicodeString.
     * @param targetLength the desired length of the string
-   * @param padChar the character to use for padding. Defaults to 
+   * @param padChar the character to use for padding. Defaults to
     * space (U+0020)
     * @return TRUE if the text was padded, FALSE otherwise.
     * @stable ICU 2.0
@@ -2359,13 +2430,14 @@ public:
     * The standard titlecase iterator for the root locale implements the
     * algorithm of Unicode TR 21.
     *
-   * This function uses only the first() and next() methods of the
+   * This function uses only the setText(), first() and next() methods of the
     * provided break iterator.
     *
     * @param titleIter A break iterator to find the first characters of words
     *                  that are to be titlecased.
     *                  If none is provided (0), then a standard titlecase
     *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
     * @return A reference to this.
     * @stable ICU 2.1
     */
@@ -2386,19 +2458,53 @@ public:
     * The standard titlecase iterator for the root locale implements the
     * algorithm of Unicode TR 21.
     *
-   * This function uses only the first() and next() methods of the
+   * This function uses only the setText(), first() and next() methods of the
     * provided break iterator.
     *
     * @param titleIter A break iterator to find the first characters of words
     *                  that are to be titlecased.
     *                  If none is provided (0), then a standard titlecase
     *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
     * @param locale    The locale to consider.
     * @return A reference to this.
     * @stable ICU 2.1
     */
    UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  
+  /**
+   * Titlecase this string, with options.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others. (This can be modified with options.)
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @param options Options bit set, see ucasemap_open().
+   * @return A reference to this.
+   * @see U_TITLECASE_NO_LOWERCASE
+   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+   * @see ucasemap_open
+   * @stable ICU 4.0
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
  #endif
  
    /**
@@ -2548,7 +2654,7 @@ public:
     *
     * @see getBuffer(int32_t minCapacity)
     * @see getBuffer()
-   * @draft ICU 2.2
+   * @stable ICU 2.2
     */
    inline const UChar *getTerminatedBuffer();
  
@@ -2556,7 +2662,7 @@ public:
    // Constructors
    //========================================
  
-  /** Construct an empty UnicodeString.  
+  /** Construct an empty UnicodeString.
     * @stable ICU 2.0
     */
    UnicodeString();
@@ -2649,15 +2755,23 @@ public:
     */
    UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  
+#if !UCONFIG_NO_CONVERSION
+
    /**
     * char* constructor.
     * @param codepageData an array of bytes, null-terminated
     * @param codepage the encoding of <TT>codepageData</TT>.  The special
-   * value 0 for <TT>codepage</TT> indicates that the text is in the 
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
     * platform's default codepage.
+   *
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
     * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
     * @stable ICU 2.0
     */
    UnicodeString(const char *codepageData,
@@ -2668,11 +2782,16 @@ public:
     * @param codepageData an array of bytes.
     * @param dataLength The number of bytes in <TT>codepageData</TT>.
     * @param codepage the encoding of <TT>codepageData</TT>.  The special
-   * value 0 for <TT>codepage</TT> indicates that the text is in the 
+   * value 0 for <TT>codepage</TT> indicates that the text is in the
     * platform's default codepage.
     * If <code>codepage</code> is an empty string (<code>""</code>),
     * then a simple conversion is performed on the codepage-invariant
     * subset ("invariant characters") of the platform encoding. See utypes.h.
+   * Recommendation: For invariant-character strings use the constructor
+   * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+   * because it avoids object code dependencies of UnicodeString on
+   * the conversion code.
+   *
     * @stable ICU 2.0
     */
    UnicodeString(const char *codepageData,
@@ -2705,6 +2824,34 @@ public:
          UConverter *cnv,
          UErrorCode &errorCode);
  
+#endif
+
+  /**
+   * Constructs a Unicode string from an invariant-character char * string.
+   * About invariant characters see utypes.h.
+   * This constructor has no runtime dependency on conversion code and is
+   * therefore recommended over ones taking a charset name string
+   * (where the empty string "" indicates invariant-character conversion).
+   *
+   * Use the macro US_INV as the third, signature-distinguishing parameter.
+   *
+   * For example:
+   * \code
+   * void fn(const char *s) {
+   *   UnicodeString ustr(s, -1, US_INV);
+   *   // use ustr ...
+   * }
+   * \endcode
+   *
+   * @param src String using only invariant characters.
+   * @param length Length of src, or -1 if NUL-terminated.
+   * @param inv Signature-distinguishing paramater, use US_INV.
+   *
+   * @see US_INV
+   * @stable ICU 3.2
+   */
+  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
  
    /**
     * Copy constructor.
@@ -2717,7 +2864,7 @@ public:
     * 'Substring' constructor from tail of source string.
     * @param src The UnicodeString object to copy.
     * @param srcStart The offset into <tt>src</tt> at which to start copying.
-   * @draft ICU2.2
+   * @stable ICU 2.2
     */
    UnicodeString(const UnicodeString& src, int32_t srcStart);
  
@@ -2726,7 +2873,7 @@ public:
     * @param src The UnicodeString object to copy.
     * @param srcStart The offset into <tt>src</tt> at which to start copying.
     * @param srcLength The number of characters from <tt>src</tt> to copy.
-   * @draft ICU2.2
+   * @stable ICU 2.2
     */
    UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  
@@ -2744,14 +2891,14 @@ public:
     *
     * @see Replaceable::clone
     * @see getDynamicClassID
-   * @draft ICU 2.6
+   * @stable ICU 2.6
     */
    virtual Replaceable *clone() const;
  
-  /** Destructor. 
+  /** Destructor.
     * @stable ICU 2.0
     */
-  ~UnicodeString();
+  virtual ~UnicodeString();
  
  
    /* Miscellaneous operations */
@@ -2760,23 +2907,23 @@ public:
     * Unescape a string of characters and return a string containing
     * the result.  The following escape sequences are recognized:
     *
-   * \uhhhh       4 hex digits; h in [0-9A-Fa-f]
-   * \Uhhhhhhhh   8 hex digits
-   * \xhh         1-2 hex digits
-   * \ooo         1-3 octal digits; o in [0-7]
-   * \cX          control-X; X is masked with 0x1F
+   * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
+   * \\Uhhhhhhhh   8 hex digits
+   * \\xhh         1-2 hex digits
+   * \\ooo         1-3 octal digits; o in [0-7]
+   * \\cX          control-X; X is masked with 0x1F
     *
     * as well as the standard ANSI C escapes:
     *
-   * \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
-   * \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
-   * \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
+   * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+   * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+   * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
     *
     * Anything else following a backslash is generically escaped.  For
-   * example, "[a\-z]" returns "[a-z]".
+   * example, "[a\\-z]" returns "[a-z]".
     *
     * If an escape sequence is ill-formed, this method returns an empty
-   * string.  An example of an ill-formed sequence is "\u" followed by
+   * string.  An example of an ill-formed sequence is "\\u" followed by
     * fewer than 4 hex digits.
     *
     * This function is similar to u_unescape() but not identical to it.
@@ -2814,18 +2961,18 @@ public:
    UChar32 unescapeAt(int32_t &offset) const;
  
    /**
-   * ICU "poor man's RTTI", returns a UClassID for the actual class.
+   * ICU "poor man's RTTI", returns a UClassID for this class.
     *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
     */
-  virtual inline UClassID getDynamicClassID() const;
+  static UClassID U_EXPORT2 getStaticClassID();
  
    /**
-   * ICU "poor man's RTTI", returns a UClassID for this class.
+   * ICU "poor man's RTTI", returns a UClassID for the actual class.
     *
-   * @draft ICU 2.2
+   * @stable ICU 2.2
     */
-  static inline UClassID getStaticClassID();
+  virtual UClassID getDynamicClassID() const;
  
    //========================================
    // Implementation methods
@@ -2834,21 +2981,21 @@ public:
  protected:
    /**
     * Implement Replaceable::getLength() (see jitterbug 1027).
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    virtual int32_t getLength() const;
  
    /**
     * The change in Replaceable to use virtual getCharAt() allows
     * UnicodeString::charAt() to be inline again (see jitterbug 709).
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    virtual UChar getCharAt(int32_t offset) const;
  
    /**
     * The change in Replaceable to use virtual getChar32At() allows
     * UnicodeString::char32At() to be inline again (see jitterbug 709).
-   * @draft ICU 2.4
+   * @stable ICU 2.4
     */
    virtual UChar32 getChar32At(int32_t offset) const;
  
@@ -2912,9 +3059,9 @@ private:
                              int32_t start,
                              int32_t length) const;
  
-  void doExtract(int32_t start, 
-         int32_t length, 
-         UChar *dst, 
+  void doExtract(int32_t start,
+         int32_t length,
+         UChar *dst,
           int32_t dstStart) const;
  
    inline void doExtract(int32_t start,
@@ -2923,16 +3070,16 @@ private:
  
    inline UChar doCharAt(int32_t offset)  const;
  
-  UnicodeString& doReplace(int32_t start, 
-               int32_t length, 
-               const UnicodeString& srcText, 
-               int32_t srcStart, 
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
                 int32_t srcLength);
  
-  UnicodeString& doReplace(int32_t start, 
-               int32_t length, 
-               const UChar *srcChars, 
-               int32_t srcStart, 
+  UnicodeString& doReplace(int32_t start,
+               int32_t length,
+               const UChar *srcChars,
+               int32_t srcStart,
                 int32_t srcLength);
  
    UnicodeString& doReverse(int32_t start,
@@ -2942,9 +3089,23 @@ private:
    int32_t doHashCode(void) const;
  
    // get pointer to start of array
+  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
    inline UChar* getArrayStart(void);
    inline const UChar* getArrayStart(void) const;
  
+  // A UnicodeString object (not necessarily its current buffer)
+  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+  inline UBool isWritable() const;
+
+  // Is the current buffer writable?
+  inline UBool isBufferWritable() const;
+
+  // None of the following does releaseArray().
+  inline void setLength(int32_t len);        // sets only fShortLength and fLength
+  inline void setToEmpty();                  // sets fFlags=kShortString
+  inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
+  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
    // allocate the array; result may be fStackBuffer
    // sets refCount to 1 if appropriate
    // sets fArray, fCapacity, and fFlags
@@ -2965,6 +3126,8 @@ private:
    inline void pinIndices(int32_t& start,
                           int32_t& length) const;
  
+#if !UCONFIG_NO_CONVERSION
+
    /* Internal extract() using UConverter. */
    int32_t doExtract(int32_t start, int32_t length,
                      char *dest, int32_t destCapacity,
@@ -2994,6 +3157,9 @@ private:
                     int32_t dataLength,
                     UConverter *converter,
                     UErrorCode &status);
+
+#endif
+
    /*
     * This function is called when write access to the array
     * is necessary.
@@ -3014,7 +3180,7 @@ private:
    // common function for case mappings
    UnicodeString &
    caseMap(BreakIterator *titleIter,
-          const Locale& locale,
+          const char *locale,
            uint32_t options,
            int32_t toWhichCase);
  
@@ -3025,7 +3191,10 @@ private:
  
    // constants
    enum {
-    US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
+    // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
+    // 32-bit pointers: 4+1+1+13*2 = 32 bytes
+    // 64-bit pointers: 8+1+1+15*2 = 40 bytes
+    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
      kInvalidUChar=0xffff, // invalid UChar index
      kGrowSize=128, // grow size for this buffer
      kInvalidHashCode=0, // invalid hash code
@@ -3046,9 +3215,11 @@ private:
      kWritableAlias=0
    };
  
-  friend class StringCharacterIterator;
    friend class StringThreadTest;
  
+  union StackBufferOrFields;        // forward declaration necessary before friend declaration
+  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
    /*
     * The following are all the class fields that are stored
     * in each UnicodeString object.
@@ -3061,42 +3232,31 @@ private:
     * on 64-bit machines (8-byte pointers), it should be 40 bytes.
     */
    // (implicit) *vtable;
-  int32_t   fLength;        // number of characters in fArray
-  int32_t   fCapacity;      // sizeof fArray
-  UChar     *fArray;        // the Unicode data
-  uint16_t  fFlags;         // bit flags: see constants above
-  UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
-
-  /**
-   * The address of this static class variable serves as this class's ID
-   * for ICU "poor man's RTTI".
-   */
-  static const char fgClassID;
+  int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
+  uint8_t   fFlags;         // bit flags: see constants above
+  union StackBufferOrFields {
+    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+    // else fFields is used
+    UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
+    struct {
+      uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
+      int32_t   fLength;    // number of characters in fArray if >127; else undefined
+      UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
+      int32_t   fCapacity;  // sizeof fArray
+    } fFields;
+  } fUnion;
  };
  
-U_NAMESPACE_END
-
-//========================================
-// Array copying
-//========================================
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-inline void 
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
-{ while(count-- > 0) *dst++ = *src++; }
-
  /**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
   */
-inline void 
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart, 
-        U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
-{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
-
-U_NAMESPACE_BEGIN
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
  
  //========================================
  // Inline members
@@ -3112,8 +3272,8 @@ UnicodeString::pinIndex(int32_t& start) const
    // pin index
    if(start < 0) {
      start = 0;
-  } else if(start > fLength) {
-    start = fLength;
+  } else if(start > length()) {
+    start = length();
    }
  }
  
@@ -3122,59 +3282,66 @@ UnicodeString::pinIndices(int32_t& start,
                            int32_t& _length) const
  {
    // pin indices
+  int32_t len = length();
    if(start < 0) {
      start = 0;
-  } else if(start > fLength) {
-    start = fLength;
+  } else if(start > len) {
+    start = len;
    }
    if(_length < 0) {
      _length = 0;
-  } else if(_length > (fLength - start)) {
-    _length = (fLength - start);
+  } else if(_length > (len - start)) {
+    _length = (len - start);
    }
  }
  
-inline UChar* 
+inline UChar*
  UnicodeString::getArrayStart()
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  
-inline const UChar* 
+inline const UChar*
  UnicodeString::getArrayStart() const
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  
  //========================================
  // Read-only implementation methods
  //========================================
-inline UClassID
-UnicodeString::getStaticClassID()
-{ return (UClassID)&fgClassID; }
-
-inline UClassID
-UnicodeString::getDynamicClassID() const
-{ return UnicodeString::getStaticClassID(); }
-
-inline int32_t  
+inline int32_t
  UnicodeString::length() const
-{ return fLength; }
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
  
-inline int32_t 
+inline int32_t
  UnicodeString::getCapacity() const
-{ return fCapacity; }
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
  
-inline int32_t 
+inline int32_t
  UnicodeString::hashCode() const
  { return doHashCode(); }
  
-inline UBool 
+inline UBool
  UnicodeString::isBogus() const
  { return (UBool)(fFlags & kIsBogus); }
  
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+  return (UBool)(
+      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+      (!(fFlags&kRefCounted) || refCount()==1));
+}
+
  inline const UChar *
  UnicodeString::getBuffer() const {
-  if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
-    return fArray;
-  } else {
+  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
      return 0;
+  } else if(fFlags&kUsingStackBuffer) {
+    return fUnion.fStackBuffer;
+  } else {
+    return fUnion.fFields.fArray;
    }
  }
  
@@ -3183,7 +3350,7 @@ UnicodeString::getBuffer() const {
  //========================================
  inline int8_t
  UnicodeString::doCompare(int32_t start,
-              int32_t length,
+              int32_t thisLength,
                const UnicodeString& srcText,
                int32_t srcStart,
                int32_t srcLength) const
@@ -3192,7 +3359,7 @@ UnicodeString::doCompare(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCompare(start, length, srcText.fArray, srcStart, srcLength);
+    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
    }
  }
  
@@ -3202,10 +3369,11 @@ UnicodeString::operator== (const UnicodeString& text) const
    if(isBogus()) {
      return text.isBogus();
    } else {
+    int32_t len = length(), textLength = text.length();
      return
        !text.isBogus() &&
-      fLength == text.fLength &&
-      doCompare(0, fLength, text, 0, text.fLength) == 0;
+      len == textLength &&
+      doCompare(0, len, text, 0, textLength) == 0;
    }
  }
  
@@ -3215,36 +3383,36 @@ UnicodeString::operator!= (const UnicodeString& text) const
  
  inline UBool
  UnicodeString::operator> (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
  
  inline UBool
  UnicodeString::operator< (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
  
  inline UBool
  UnicodeString::operator>= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
  
  inline UBool
  UnicodeString::operator<= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compare(const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength); }
+{ return doCompare(0, length(), text, 0, text.length()); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compare(int32_t start,
                 int32_t _length,
                 const UnicodeString& srcText) const
-{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compare(const UChar *srcChars,
                 int32_t srcLength) const
-{ return doCompare(0, fLength, srcChars, 0, srcLength); }
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compare(int32_t start,
                 int32_t _length,
                 const UnicodeString& srcText,
@@ -3258,7 +3426,7 @@ UnicodeString::compare(int32_t start,
                 const UChar *srcChars) const
  { return doCompare(start, _length, srcChars, 0, _length); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compare(int32_t start,
                 int32_t _length,
                 const UChar *srcChars,
@@ -3272,12 +3440,12 @@ UnicodeString::compareBetween(int32_t start,
                    const UnicodeString& srcText,
                    int32_t srcStart,
                    int32_t srcLimit) const
-{ return doCompare(start, limit - start, 
+{ return doCompare(start, limit - start,
             srcText, srcStart, srcLimit - srcStart); }
  
  inline int8_t
  UnicodeString::doCompareCodePointOrder(int32_t start,
-                                       int32_t length,
+                                       int32_t thisLength,
                                         const UnicodeString& srcText,
                                         int32_t srcStart,
                                         int32_t srcLength) const
@@ -3286,26 +3454,26 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
+    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
    }
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compareCodePointOrder(const UnicodeString& text) const
-{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compareCodePointOrder(int32_t start,
                                       int32_t _length,
                                       const UnicodeString& srcText) const
-{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compareCodePointOrder(const UChar *srcChars,
                                       int32_t srcLength) const
-{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compareCodePointOrder(int32_t start,
                                       int32_t _length,
                                       const UnicodeString& srcText,
@@ -3319,7 +3487,7 @@ UnicodeString::compareCodePointOrder(int32_t start,
                                       const UChar *srcChars) const
  { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  
-inline int8_t 
+inline int8_t
  UnicodeString::compareCodePointOrder(int32_t start,
                                       int32_t _length,
                                       const UChar *srcChars,
@@ -3333,12 +3501,12 @@ UnicodeString::compareCodePointOrderBetween(int32_t start,
                                              const UnicodeString& srcText,
                                              int32_t srcStart,
                                              int32_t srcLimit) const
-{ return doCompareCodePointOrder(start, limit - start, 
+{ return doCompareCodePointOrder(start, limit - start,
             srcText, srcStart, srcLimit - srcStart); }
  
  inline int8_t
  UnicodeString::doCaseCompare(int32_t start,
-                             int32_t length,
+                             int32_t thisLength,
                               const UnicodeString &srcText,
                               int32_t srcStart,
                               int32_t srcLength,
@@ -3348,31 +3516,31 @@ UnicodeString::doCaseCompare(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
+    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
    }
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
-  return doCaseCompare(0, fLength, text, 0, text.fLength, options);
+  return doCaseCompare(0, length(), text, 0, text.length(), options);
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::caseCompare(int32_t start,
                             int32_t _length,
                             const UnicodeString &srcText,
                             uint32_t options) const {
-  return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
+  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::caseCompare(const UChar *srcChars,
                             int32_t srcLength,
                             uint32_t options) const {
-  return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
+  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::caseCompare(int32_t start,
                             int32_t _length,
                             const UnicodeString &srcText,
@@ -3390,7 +3558,7 @@ UnicodeString::caseCompare(int32_t start,
    return doCaseCompare(start, _length, srcChars, 0, _length, options);
  }
  
-inline int8_t 
+inline int8_t
  UnicodeString::caseCompare(int32_t start,
                             int32_t _length,
                             const UChar *srcChars,
@@ -3410,7 +3578,7 @@ UnicodeString::caseCompareBetween(int32_t start,
    return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UnicodeString& srcText,
                 int32_t srcStart,
                 int32_t srcLength,
@@ -3426,88 +3594,88 @@ UnicodeString::indexOf(const UnicodeString& srcText,
    return -1;
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UnicodeString& text) const
-{ return indexOf(text, 0, text.fLength, 0, fLength); }
+{ return indexOf(text, 0, text.length(), 0, length()); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UnicodeString& text,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(text, 0, text.fLength, start, fLength - start);
+  return indexOf(text, 0, text.length(), start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UnicodeString& text,
                 int32_t start,
                 int32_t _length) const
-{ return indexOf(text, 0, text.fLength, start, _length); }
+{ return indexOf(text, 0, text.length(), start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(srcChars, 0, srcLength, start, fLength - start);
+  return indexOf(srcChars, 0, srcLength, start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start,
                 int32_t _length) const
  { return indexOf(srcChars, 0, srcLength, start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar c,
                 int32_t start,
                 int32_t _length) const
  { return doIndexOf(c, start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar32 c,
                 int32_t start,
                 int32_t _length) const
  { return doIndexOf(c, start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar c) const
-{ return doIndexOf(c, 0, fLength); }
+{ return doIndexOf(c, 0, length()); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar32 c) const
-{ return indexOf(c, 0, fLength); }
+{ return indexOf(c, 0, length()); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar c,
                 int32_t start) const {
    pinIndex(start);
-  return doIndexOf(c, start, fLength - start);
+  return doIndexOf(c, start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::indexOf(UChar32 c,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(c, start, fLength - start);
+  return indexOf(c, start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start,
                 int32_t _length) const
  { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
+  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& srcText,
                 int32_t srcStart,
                 int32_t srcLength,
@@ -3523,156 +3691,156 @@ UnicodeString::lastIndexOf(const UnicodeString& srcText,
    return -1;
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text,
                 int32_t start,
                 int32_t _length) const
-{ return lastIndexOf(text, 0, text.fLength, start, _length); }
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(text, 0, text.fLength, start, fLength - start);
+  return lastIndexOf(text, 0, text.length(), start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text) const
-{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar c,
                 int32_t start,
                 int32_t _length) const
  { return doLastIndexOf(c, start, _length); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar32 c,
                 int32_t start,
                 int32_t _length) const {
    return doLastIndexOf(c, start, _length);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar c) const
-{ return doLastIndexOf(c, 0, fLength); }
+{ return doLastIndexOf(c, 0, length()); }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar32 c) const {
-  return lastIndexOf(c, 0, fLength);
+  return lastIndexOf(c, 0, length());
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar c,
                 int32_t start) const {
    pinIndex(start);
-  return doLastIndexOf(c, start, fLength - start);
+  return doLastIndexOf(c, start, length() - start);
  }
  
-inline int32_t 
+inline int32_t
  UnicodeString::lastIndexOf(UChar32 c,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(c, start, fLength - start);
+  return lastIndexOf(c, start, length() - start);
  }
  
-inline UBool 
+inline UBool
  UnicodeString::startsWith(const UnicodeString& text) const
-{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
  
-inline UBool 
+inline UBool
  UnicodeString::startsWith(const UnicodeString& srcText,
                int32_t srcStart,
                int32_t srcLength) const
  { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  
-inline UBool 
+inline UBool
  UnicodeString::startsWith(const UChar *srcChars,
                int32_t srcLength) const
  { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
  
-inline UBool 
+inline UBool
  UnicodeString::startsWith(const UChar *srcChars,
                int32_t srcStart,
                int32_t srcLength) const
  { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
  
-inline UBool 
+inline UBool
  UnicodeString::endsWith(const UnicodeString& text) const
-{ return doCompare(fLength - text.fLength, text.fLength, 
-           text, 0, text.fLength) == 0; }
+{ return doCompare(length() - text.length(), text.length(),
+           text, 0, text.length()) == 0; }
  
-inline UBool 
+inline UBool
  UnicodeString::endsWith(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength) const {
    srcText.pinIndices(srcStart, srcLength);
-  return doCompare(fLength - srcLength, srcLength, 
+  return doCompare(length() - srcLength, srcLength,
                     srcText, srcStart, srcLength) == 0;
  }
  
-inline UBool 
+inline UBool
  UnicodeString::endsWith(const UChar *srcChars,
              int32_t srcLength) const {
    if(srcLength < 0) {
      srcLength = u_strlen(srcChars);
    }
-  return doCompare(fLength - srcLength, srcLength, 
+  return doCompare(length() - srcLength, srcLength,
                     srcChars, 0, srcLength) == 0;
  }
  
-inline UBool 
+inline UBool
  UnicodeString::endsWith(const UChar *srcChars,
              int32_t srcStart,
              int32_t srcLength) const {
    if(srcLength < 0) {
      srcLength = u_strlen(srcChars + srcStart);
    }
-  return doCompare(fLength - srcLength, srcLength, 
+  return doCompare(length() - srcLength, srcLength,
                     srcChars, srcStart, srcLength) == 0;
  }
  
  //========================================
  // replace
  //========================================
-inline UnicodeString& 
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
-               const UnicodeString& srcText) 
-{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
-
-inline UnicodeString& 
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
-               const UnicodeString& srcText, 
-               int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UnicodeString& srcText,
+               int32_t srcStart,
                 int32_t srcLength)
  { return doReplace(start, _length, srcText, srcStart, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
                 const UChar *srcChars,
                 int32_t srcLength)
  { return doReplace(start, _length, srcChars, 0, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
-               const UChar *srcChars, 
-               int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
+               const UChar *srcChars,
+               int32_t srcStart,
                 int32_t srcLength)
  { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+               int32_t _length,
                 UChar srcChar)
  { return doReplace(start, _length, &srcChar, 0, 1); }
  
  inline UnicodeString&
-UnicodeString::replace(int32_t start, 
-               int32_t _length, 
+UnicodeString::replace(int32_t start,
+               int32_t _length,
                 UChar32 srcChar) {
    UChar buffer[U16_MAX_LENGTH];
    int32_t count = 0;
@@ -3681,33 +3849,33 @@ UnicodeString::replace(int32_t start,
    return doReplace(start, _length, buffer, 0, count);
  }
  
-inline UnicodeString& 
-UnicodeString::replaceBetween(int32_t start, 
-                  int32_t limit, 
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
                    const UnicodeString& srcText)
-{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  
  inline UnicodeString&
-UnicodeString::replaceBetween(int32_t start, 
-                  int32_t limit, 
-                  const UnicodeString& srcText, 
-                  int32_t srcStart, 
+UnicodeString::replaceBetween(int32_t start,
+                  int32_t limit,
+                  const UnicodeString& srcText,
+                  int32_t srcStart,
                    int32_t srcLimit)
  { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::findAndReplace(const UnicodeString& oldText,
                    const UnicodeString& newText)
-{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength, 
-            newText, 0, newText.fLength); }
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::findAndReplace(int32_t start,
                    int32_t _length,
                    const UnicodeString& oldText,
                    const UnicodeString& newText)
-{ return findAndReplace(start, _length, oldText, 0, oldText.fLength, 
-            newText, 0, newText.fLength); }
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
  
  // ============================
  // extract
@@ -3716,21 +3884,23 @@ inline void
  UnicodeString::doExtract(int32_t start,
               int32_t _length,
               UnicodeString& target) const
-{ target.replace(0, target.fLength, *this, start, _length); }
+{ target.replace(0, target.length(), *this, start, _length); }
  
-inline void  
-UnicodeString::extract(int32_t start, 
-               int32_t _length, 
-               UChar *target, 
+inline void
+UnicodeString::extract(int32_t start,
+               int32_t _length,
+               UChar *target,
                 int32_t targetStart) const
  { doExtract(start, _length, target, targetStart); }
  
-inline void 
+inline void
  UnicodeString::extract(int32_t start,
                 int32_t _length,
                 UnicodeString& target) const
  { doExtract(start, _length, target); }
  
+#if !UCONFIG_NO_CONVERSION
+
  inline int32_t
  UnicodeString::extract(int32_t start,
                 int32_t _length,
@@ -3742,18 +3912,23 @@ UnicodeString::extract(int32_t start,
    return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  }
  
-inline void  
-UnicodeString::extractBetween(int32_t start, 
-                  int32_t limit, 
-                  UChar *dst, 
-                  int32_t dstStart) const
-{ doExtract(start, limit - start, dst, dstStart); }
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+                  int32_t limit,
+                  UChar *dst,
+                  int32_t dstStart) const {
+  pinIndex(start);
+  pinIndex(limit);
+  doExtract(start, limit - start, dst, dstStart);
+}
  
  inline UChar
  UnicodeString::doCharAt(int32_t offset) const
  {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    return fArray[offset];
+  if((uint32_t)offset < (uint32_t)length()) {
+    return getArrayStart()[offset];
    } else {
      return kInvalidUChar;
    }
@@ -3770,9 +3945,11 @@ UnicodeString::operator[] (int32_t offset) const
  inline UChar32
  UnicodeString::char32At(int32_t offset) const
  {
-  if((uint32_t)offset < (uint32_t)fLength) {
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
      UChar32 c;
-    U16_GET(fArray, 0, offset, fLength, c);
+    U16_GET(array, 0, offset, len, c);
      return c;
    } else {
      return kInvalidUChar;
@@ -3781,8 +3958,9 @@ UnicodeString::char32At(int32_t offset) const
  
  inline int32_t
  UnicodeString::getChar32Start(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    U16_SET_CP_START(fArray, 0, offset);
+  if((uint32_t)offset < (uint32_t)length()) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_START(array, 0, offset);
      return offset;
    } else {
      return 0;
@@ -3791,232 +3969,262 @@ UnicodeString::getChar32Start(int32_t offset) const {
  
  inline int32_t
  UnicodeString::getChar32Limit(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_LIMIT(array, 0, offset, len);
      return offset;
    } else {
-    return fLength;
+    return len;
    }
  }
  
  inline UBool
  UnicodeString::isEmpty() const {
-  return fLength == 0;
+  return fShortLength == 0;
  }
  
  //========================================
  // Write implementation methods
  //========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+  if(len <= 127) {
+    fShortLength = (int8_t)len;
+  } else {
+    fShortLength = (int8_t)-1;
+    fUnion.fFields.fLength = len;
+  }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+  fShortLength = 0;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setToStackBuffer(int32_t len) {
+  fShortLength = (int8_t)len;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+  setLength(len);
+  fUnion.fFields.fArray = array;
+  fUnion.fFields.fCapacity = capacity;
+}
+
  inline const UChar *
  UnicodeString::getTerminatedBuffer() {
-  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+  if(!isWritable()) {
      return 0;
-  } else if(fLength<fCapacity && fArray[fLength]==0) {
-    return fArray;
-  } else if(cloneArrayIfNeeded(fLength+1)) {
-    fArray[fLength]=0;
-    return fArray;
    } else {
-    return 0;
+    UChar *array = getArrayStart();
+    int32_t len = length();
+    if(len < getCapacity() && array[len] == 0) {
+      return array;
+    } else if(cloneArrayIfNeeded(len+1)) {
+      array = getArrayStart();
+      array[len] = 0;
+      return array;
+    } else {
+      return 0;
+    }
    }
  }
  
-inline UnicodeString& 
-UnicodeString::operator= (UChar ch) 
-{ return doReplace(0, fLength, &ch, 0, 1); }
+inline UnicodeString&
+UnicodeString::operator= (UChar ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
  
-inline UnicodeString& 
-UnicodeString::operator= (UChar32 ch) 
-{ return replace(0, fLength, ch); }
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
  
-inline UnicodeString& 
-UnicodeString::setTo(const UnicodeString& srcText, 
-             int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+             int32_t srcStart,
               int32_t srcLength)
  {
    unBogus();
-  return doReplace(0, fLength, srcText, srcStart, srcLength);
+  return doReplace(0, length(), srcText, srcStart, srcLength);
  }
  
-inline UnicodeString& 
-UnicodeString::setTo(const UnicodeString& srcText, 
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
               int32_t srcStart)
  {
    unBogus();
    srcText.pinIndex(srcStart);
-  return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
+  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::setTo(const UnicodeString& srcText)
  {
    unBogus();
-  return doReplace(0, fLength, srcText, 0, srcText.fLength);
+  return doReplace(0, length(), srcText, 0, srcText.length());
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::setTo(const UChar *srcChars,
               int32_t srcLength)
  {
    unBogus();
-  return doReplace(0, fLength, srcChars, 0, srcLength);
+  return doReplace(0, length(), srcChars, 0, srcLength);
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::setTo(UChar srcChar)
  {
    unBogus();
-  return doReplace(0, fLength, &srcChar, 0, 1);
+  return doReplace(0, length(), &srcChar, 0, 1);
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::setTo(UChar32 srcChar)
  {
    unBogus();
-  return replace(0, fLength, srcChar);
+  return replace(0, length(), srcChar);
  }
  
-inline UnicodeString& 
-UnicodeString::operator+= (UChar ch)
-{ return doReplace(fLength, 0, &ch, 0, 1); }
-
-inline UnicodeString& 
-UnicodeString::operator+= (UChar32 ch) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t _length = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
-  return doReplace(fLength, 0, buffer, 0, _length);
-}
-
-inline UnicodeString& 
-UnicodeString::operator+= (const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
-
-inline UnicodeString& 
-UnicodeString::append(const UnicodeString& srcText, 
-              int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+              int32_t srcStart,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::append(const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
  
-inline UnicodeString& 
-UnicodeString::append(const UChar *srcChars, 
-              int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+              int32_t srcStart,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::append(const UChar *srcChars,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::append(UChar srcChar)
-{ return doReplace(fLength, 0, &srcChar, 0, 1); }
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::append(UChar32 srcChar) {
    UChar buffer[U16_MAX_LENGTH];
    int32_t _length = 0;
    UBool isError = FALSE;
    U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(fLength, 0, buffer, 0, _length);
+  return doReplace(length(), 0, buffer, 0, _length);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+  return append(ch);
  }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
-              const UnicodeString& srcText, 
-              int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UnicodeString& srcText,
+              int32_t srcStart,
                int32_t srcLength)
  { return doReplace(start, 0, srcText, srcStart, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
                const UnicodeString& srcText)
-{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
-              const UChar *srcChars, 
-              int32_t srcStart, 
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+              const UChar *srcChars,
+              int32_t srcStart,
                int32_t srcLength)
  { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
                const UChar *srcChars,
                int32_t srcLength)
  { return doReplace(start, 0, srcChars, 0, srcLength); }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
                UChar srcChar)
  { return doReplace(start, 0, &srcChar, 0, 1); }
  
-inline UnicodeString& 
-UnicodeString::insert(int32_t start, 
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
                UChar32 srcChar)
  { return replace(start, 0, srcChar); }
  
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::remove()
  {
    // remove() of a bogus string makes the string empty and non-bogus
    if(isBogus()) {
      unBogus();
    } else {
-    fLength = 0;
+    setLength(0);
    }
    return *this;
  }
  
-inline UnicodeString& 
-UnicodeString::remove(int32_t start, 
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
               int32_t _length)
  {
-  if(start <= 0 && _length == INT32_MAX) {
-    // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
-    return remove();
-  } else {
+    if(start <= 0 && _length == INT32_MAX) {
+        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+        return remove();
+    }
      return doReplace(start, _length, NULL, 0, 0);
-  }
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::removeBetween(int32_t start,
                  int32_t limit)
  { return doReplace(start, limit - start, NULL, 0, 0); }
  
-inline UBool 
+inline UBool
  UnicodeString::truncate(int32_t targetLength)
  {
    if(isBogus() && targetLength == 0) {
      // truncate(0) of a bogus string makes the string empty and non-bogus
      unBogus();
      return FALSE;
-  } else if((uint32_t)targetLength < (uint32_t)fLength) {
-    fLength = targetLength;
+  } else if((uint32_t)targetLength < (uint32_t)length()) {
+    setLength(targetLength);
      return TRUE;
    } else {
      return FALSE;
    }
  }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::reverse()
-{ return doReverse(0, fLength); }
+{ return doReverse(0, length()); }
  
-inline UnicodeString& 
+inline UnicodeString&
  UnicodeString::reverse(int32_t start,
                 int32_t _length)
  { return doReverse(start, _length); }
  
-
  U_NAMESPACE_END
  
  #endif