ICU-400.42.tar.gz

[apple/icu.git] / icuSources / common / unicode / unistr.h
diff --git a/icuSources/common/unicode/unistr.h b/icuSources/common/unicode/unistr.h

index 3827458bfb9df9e3b67b240937200d883b1f7071..9a96bdc27662374685ce76468f639b1e86ed78a6 100644 (file)
--- a/icuSources/common/unicode/unistr.h
+++ b/icuSources/common/unicode/unistr.h
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1998-2004, International Business Machines
+*   Copyright (C) 1998-2008, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *
@@ -21,6 +21,11 @@
  #ifndef UNISTR_H
  #define UNISTR_H
  
+/**
+ * \file 
+ * \brief C++ API: Unicode String 
+ */
+
  #include "unicode/rep.h"
  
  struct UConverter;          // unicode/ucnv.h
@@ -37,7 +42,9 @@ class  StringThreadTest;
  #endif
  
  #ifndef USTRING_H
-/* see ustring.h */
+/**
+ * \ingroup ustring_ustrlen
+ */
  U_STABLE int32_t U_EXPORT2
  u_strlen(const UChar *s);
  #endif
@@ -58,9 +65,9 @@ class BreakIterator;        // unicode/brkiter.h
   * therefore recommended over ones taking a charset name string
   * (where the empty string "" indicates invariant-character conversion).
   *
- * @draft ICU 3.2
+ * @stable ICU 3.2
   */
-#define US_INV UnicodeString::kInvariant
+#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
  
  /**
   * Unicode String literals in C++.
@@ -79,12 +86,14 @@ class BreakIterator;        // unicode/brkiter.h
   * such string variable before it is used.
   * @stable ICU 2.0
   */
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#if defined(U_DECLARE_UTF16)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
  #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
  #else
-#   define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
+#   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
  #endif
  
  /**
@@ -100,13 +109,7 @@ class BreakIterator;        // unicode/brkiter.h
   * The string parameter must be a C string literal.
   * @stable ICU 2.0
   */
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
-#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
-#else
-#   define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
-#endif
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  
  /**
   * UnicodeString is a string class that stores Unicode characters directly and provides
@@ -116,13 +119,12 @@ class BreakIterator;        // unicode/brkiter.h
   * The UnicodeString class is not suitable for subclassing.
   *
   * <p>For an overview of Unicode strings in C and C++ see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   *
   * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
- * A Unicode character may be stored with either
- * one code unit &#8212; which is the most common case &#8212; or with a matched pair of
- * special code units ("surrogates").
- * The data type for code units is UChar.<br>
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar. 
   * For single-character handling, a Unicode character code <em>point</em> is a value
   * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
   *
@@ -172,7 +174,7 @@ class BreakIterator;        // unicode/brkiter.h
   * significant performance improvements.
   * Also, the internal buffer is accessible via special functions.
   * For details see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
   *
   * @see utf.h
   * @see CharacterIterator
@@ -188,12 +190,12 @@ public:
     * Use the macro US_INV instead of the full qualification for this value.
     *
     * @see US_INV
-   * @draft ICU 3.2
+   * @stable ICU 3.2
     */
    enum EInvariant {
      /**
       * @see EInvariant
-     * @draft ICU 3.2
+     * @stable ICU 3.2
       */
      kInvariant
    };
@@ -393,7 +395,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -412,7 +414,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -435,7 +437,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -462,7 +464,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -483,7 +485,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -506,7 +508,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -533,7 +535,7 @@ public:
  
    /**
     * Compare two Unicode strings in code point order.
-   * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+   * The result may be different from the results of compare(), operator<, etc.
     * if supplementary characters are present:
     *
     * In UTF-16, supplementary characters (with code points U+10000 and above) are
@@ -1434,7 +1436,7 @@ public:
     * @param targetCapacity the length of the target buffer
     * @param inv Signature-distinguishing paramater, use US_INV.
     * @return the output string length, not including the terminating NUL
-   * @draft ICU 3.2
+   * @stable ICU 3.2
     */
    int32_t extract(int32_t start,
             int32_t startLength,
@@ -2428,7 +2430,7 @@ public:
     * The standard titlecase iterator for the root locale implements the
     * algorithm of Unicode TR 21.
     *
-   * This function uses only the first() and next() methods of the
+   * This function uses only the setText(), first() and next() methods of the
     * provided break iterator.
     *
     * @param titleIter A break iterator to find the first characters of words
@@ -2456,7 +2458,7 @@ public:
     * The standard titlecase iterator for the root locale implements the
     * algorithm of Unicode TR 21.
     *
-   * This function uses only the first() and next() methods of the
+   * This function uses only the setText(), first() and next() methods of the
     * provided break iterator.
     *
     * @param titleIter A break iterator to find the first characters of words
@@ -2470,6 +2472,39 @@ public:
     */
    UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  
+  /**
+   * Titlecase this string, with options.
+   *
+   * Casing is locale-dependent and context-sensitive.
+   * Titlecasing uses a break iterator to find the first characters of words
+   * that are to be titlecased. It titlecases those characters and lowercases
+   * all others. (This can be modified with options.)
+   *
+   * The titlecase break iterator can be provided to customize for arbitrary
+   * styles, using rules and dictionaries beyond the standard iterators.
+   * It may be more efficient to always provide an iterator to avoid
+   * opening and closing one for each string.
+   * The standard titlecase iterator for the root locale implements the
+   * algorithm of Unicode TR 21.
+   *
+   * This function uses only the setText(), first() and next() methods of the
+   * provided break iterator.
+   *
+   * @param titleIter A break iterator to find the first characters of words
+   *                  that are to be titlecased.
+   *                  If none is provided (0), then a standard titlecase
+   *                  break iterator is opened.
+   *                  Otherwise the provided iterator is set to the string's text.
+   * @param locale    The locale to consider.
+   * @param options Options bit set, see ucasemap_open().
+   * @return A reference to this.
+   * @see U_TITLECASE_NO_LOWERCASE
+   * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+   * @see ucasemap_open
+   * @stable ICU 4.0
+   */
+  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
  #endif
  
    /**
@@ -2813,7 +2848,7 @@ public:
     * @param inv Signature-distinguishing paramater, use US_INV.
     *
     * @see US_INV
-   * @draft ICU 3.2
+   * @stable ICU 3.2
     */
    UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  
@@ -3054,9 +3089,23 @@ private:
    int32_t doHashCode(void) const;
  
    // get pointer to start of array
+  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
    inline UChar* getArrayStart(void);
    inline const UChar* getArrayStart(void) const;
  
+  // A UnicodeString object (not necessarily its current buffer)
+  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+  inline UBool isWritable() const;
+
+  // Is the current buffer writable?
+  inline UBool isBufferWritable() const;
+
+  // None of the following does releaseArray().
+  inline void setLength(int32_t len);        // sets only fShortLength and fLength
+  inline void setToEmpty();                  // sets fFlags=kShortString
+  inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
+  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
    // allocate the array; result may be fStackBuffer
    // sets refCount to 1 if appropriate
    // sets fArray, fCapacity, and fFlags
@@ -3142,7 +3191,10 @@ private:
  
    // constants
    enum {
-    US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
+    // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
+    // 32-bit pointers: 4+1+1+13*2 = 32 bytes
+    // 64-bit pointers: 8+1+1+15*2 = 40 bytes
+    US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
      kInvalidUChar=0xffff, // invalid UChar index
      kGrowSize=128, // grow size for this buffer
      kInvalidHashCode=0, // invalid hash code
@@ -3163,9 +3215,11 @@ private:
      kWritableAlias=0
    };
  
-  friend class StringCharacterIterator;
    friend class StringThreadTest;
  
+  union StackBufferOrFields;        // forward declaration necessary before friend declaration
+  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
    /*
     * The following are all the class fields that are stored
     * in each UnicodeString object.
@@ -3178,12 +3232,19 @@ private:
     * on 64-bit machines (8-byte pointers), it should be 40 bytes.
     */
    // (implicit) *vtable;
-  int32_t   fLength;        // number of characters in fArray
-  int32_t   fCapacity;      // sizeof fArray
-  UChar     *fArray;        // the Unicode data
-  uint16_t  fFlags;         // bit flags: see constants above
-  UChar     fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
-
+  int8_t    fShortLength;   // 0..127: length  <0: real length is in fUnion.fFields.fLength
+  uint8_t   fFlags;         // bit flags: see constants above
+  union StackBufferOrFields {
+    // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+    // else fFields is used
+    UChar     fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
+    struct {
+      uint16_t  fPadding;   // align the following field at 8B (32b pointers) or 12B (64b)
+      int32_t   fLength;    // number of characters in fArray if >127; else undefined
+      UChar     *fArray;    // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
+      int32_t   fCapacity;  // sizeof fArray
+    } fFields;
+  } fUnion;
  };
  
  /**
@@ -3192,37 +3253,11 @@ private:
   * @param s1 The first string to be copied to the new one.
   * @param s2 The second string to be copied to the new one, after s1.
   * @return UnicodeString(s1).append(s2)
- * @draft ICU 2.8
+ * @stable ICU 2.8
   */
  U_COMMON_API UnicodeString U_EXPORT2
  operator+ (const UnicodeString &s1, const UnicodeString &s2);
  
-U_NAMESPACE_END
-
-// inline implementations -------------------------------------------------- ***
-
-//========================================
-// Array copying
-//========================================
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-inline void
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
-{ while(count-- > 0) *dst++ = *src++; }
-
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-inline void
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
-        U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
-{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
-
-U_NAMESPACE_BEGIN
-
  //========================================
  // Inline members
  //========================================
@@ -3237,8 +3272,8 @@ UnicodeString::pinIndex(int32_t& start) const
    // pin index
    if(start < 0) {
      start = 0;
-  } else if(start > fLength) {
-    start = fLength;
+  } else if(start > length()) {
+    start = length();
    }
  }
  
@@ -3247,36 +3282,37 @@ UnicodeString::pinIndices(int32_t& start,
                            int32_t& _length) const
  {
    // pin indices
+  int32_t len = length();
    if(start < 0) {
      start = 0;
-  } else if(start > fLength) {
-    start = fLength;
+  } else if(start > len) {
+    start = len;
    }
    if(_length < 0) {
      _length = 0;
-  } else if(_length > (fLength - start)) {
-    _length = (fLength - start);
+  } else if(_length > (len - start)) {
+    _length = (len - start);
    }
  }
  
  inline UChar*
  UnicodeString::getArrayStart()
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  
  inline const UChar*
  UnicodeString::getArrayStart() const
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
  
  //========================================
  // Read-only implementation methods
  //========================================
  inline int32_t
  UnicodeString::length() const
-{ return fLength; }
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
  
  inline int32_t
  UnicodeString::getCapacity() const
-{ return fCapacity; }
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
  
  inline int32_t
  UnicodeString::hashCode() const
@@ -3286,12 +3322,26 @@ inline UBool
  UnicodeString::isBogus() const
  { return (UBool)(fFlags & kIsBogus); }
  
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+  return (UBool)(
+      !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+      (!(fFlags&kRefCounted) || refCount()==1));
+}
+
  inline const UChar *
  UnicodeString::getBuffer() const {
-  if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
-    return fArray;
-  } else {
+  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
      return 0;
+  } else if(fFlags&kUsingStackBuffer) {
+    return fUnion.fStackBuffer;
+  } else {
+    return fUnion.fFields.fArray;
    }
  }
  
@@ -3300,7 +3350,7 @@ UnicodeString::getBuffer() const {
  //========================================
  inline int8_t
  UnicodeString::doCompare(int32_t start,
-              int32_t length,
+              int32_t thisLength,
                const UnicodeString& srcText,
                int32_t srcStart,
                int32_t srcLength) const
@@ -3309,7 +3359,7 @@ UnicodeString::doCompare(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCompare(start, length, srcText.fArray, srcStart, srcLength);
+    return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
    }
  }
  
@@ -3319,10 +3369,11 @@ UnicodeString::operator== (const UnicodeString& text) const
    if(isBogus()) {
      return text.isBogus();
    } else {
+    int32_t len = length(), textLength = text.length();
      return
        !text.isBogus() &&
-      fLength == text.fLength &&
-      doCompare(0, fLength, text, 0, text.fLength) == 0;
+      len == textLength &&
+      doCompare(0, len, text, 0, textLength) == 0;
    }
  }
  
@@ -3332,34 +3383,34 @@ UnicodeString::operator!= (const UnicodeString& text) const
  
  inline UBool
  UnicodeString::operator> (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
  
  inline UBool
  UnicodeString::operator< (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
  
  inline UBool
  UnicodeString::operator>= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
  
  inline UBool
  UnicodeString::operator<= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
  
  inline int8_t
  UnicodeString::compare(const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength); }
+{ return doCompare(0, length(), text, 0, text.length()); }
  
  inline int8_t
  UnicodeString::compare(int32_t start,
                 int32_t _length,
                 const UnicodeString& srcText) const
-{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
  
  inline int8_t
  UnicodeString::compare(const UChar *srcChars,
                 int32_t srcLength) const
-{ return doCompare(0, fLength, srcChars, 0, srcLength); }
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
  
  inline int8_t
  UnicodeString::compare(int32_t start,
@@ -3394,7 +3445,7 @@ UnicodeString::compareBetween(int32_t start,
  
  inline int8_t
  UnicodeString::doCompareCodePointOrder(int32_t start,
-                                       int32_t length,
+                                       int32_t thisLength,
                                         const UnicodeString& srcText,
                                         int32_t srcStart,
                                         int32_t srcLength) const
@@ -3403,24 +3454,24 @@ UnicodeString::doCompareCodePointOrder(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
+    return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
    }
  }
  
  inline int8_t
  UnicodeString::compareCodePointOrder(const UnicodeString& text) const
-{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  
  inline int8_t
  UnicodeString::compareCodePointOrder(int32_t start,
                                       int32_t _length,
                                       const UnicodeString& srcText) const
-{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  
  inline int8_t
  UnicodeString::compareCodePointOrder(const UChar *srcChars,
                                       int32_t srcLength) const
-{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  
  inline int8_t
  UnicodeString::compareCodePointOrder(int32_t start,
@@ -3455,7 +3506,7 @@ UnicodeString::compareCodePointOrderBetween(int32_t start,
  
  inline int8_t
  UnicodeString::doCaseCompare(int32_t start,
-                             int32_t length,
+                             int32_t thisLength,
                               const UnicodeString &srcText,
                               int32_t srcStart,
                               int32_t srcLength,
@@ -3465,13 +3516,13 @@ UnicodeString::doCaseCompare(int32_t start,
      return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
    } else {
      srcText.pinIndices(srcStart, srcLength);
-    return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
+    return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
    }
  }
  
  inline int8_t
  UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
-  return doCaseCompare(0, fLength, text, 0, text.fLength, options);
+  return doCaseCompare(0, length(), text, 0, text.length(), options);
  }
  
  inline int8_t
@@ -3479,14 +3530,14 @@ UnicodeString::caseCompare(int32_t start,
                             int32_t _length,
                             const UnicodeString &srcText,
                             uint32_t options) const {
-  return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
+  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  }
  
  inline int8_t
  UnicodeString::caseCompare(const UChar *srcChars,
                             int32_t srcLength,
                             uint32_t options) const {
-  return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
+  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  }
  
  inline int8_t
@@ -3545,27 +3596,27 @@ UnicodeString::indexOf(const UnicodeString& srcText,
  
  inline int32_t
  UnicodeString::indexOf(const UnicodeString& text) const
-{ return indexOf(text, 0, text.fLength, 0, fLength); }
+{ return indexOf(text, 0, text.length(), 0, length()); }
  
  inline int32_t
  UnicodeString::indexOf(const UnicodeString& text,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(text, 0, text.fLength, start, fLength - start);
+  return indexOf(text, 0, text.length(), start, length() - start);
  }
  
  inline int32_t
  UnicodeString::indexOf(const UnicodeString& text,
                 int32_t start,
                 int32_t _length) const
-{ return indexOf(text, 0, text.fLength, start, _length); }
+{ return indexOf(text, 0, text.length(), start, _length); }
  
  inline int32_t
  UnicodeString::indexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(srcChars, 0, srcLength, start, fLength - start);
+  return indexOf(srcChars, 0, srcLength, start, length() - start);
  }
  
  inline int32_t
@@ -3589,24 +3640,24 @@ UnicodeString::indexOf(UChar32 c,
  
  inline int32_t
  UnicodeString::indexOf(UChar c) const
-{ return doIndexOf(c, 0, fLength); }
+{ return doIndexOf(c, 0, length()); }
  
  inline int32_t
  UnicodeString::indexOf(UChar32 c) const
-{ return indexOf(c, 0, fLength); }
+{ return indexOf(c, 0, length()); }
  
  inline int32_t
  UnicodeString::indexOf(UChar c,
                 int32_t start) const {
    pinIndex(start);
-  return doIndexOf(c, start, fLength - start);
+  return doIndexOf(c, start, length() - start);
  }
  
  inline int32_t
  UnicodeString::indexOf(UChar32 c,
                 int32_t start) const {
    pinIndex(start);
-  return indexOf(c, start, fLength - start);
+  return indexOf(c, start, length() - start);
  }
  
  inline int32_t
@@ -3621,7 +3672,7 @@ UnicodeString::lastIndexOf(const UChar *srcChars,
                 int32_t srcLength,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
+  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  }
  
  inline int32_t
@@ -3644,18 +3695,18 @@ inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text,
                 int32_t start,
                 int32_t _length) const
-{ return lastIndexOf(text, 0, text.fLength, start, _length); }
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
  
  inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(text, 0, text.fLength, start, fLength - start);
+  return lastIndexOf(text, 0, text.length(), start, length() - start);
  }
  
  inline int32_t
  UnicodeString::lastIndexOf(const UnicodeString& text) const
-{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
  
  inline int32_t
  UnicodeString::lastIndexOf(UChar c,
@@ -3672,30 +3723,30 @@ UnicodeString::lastIndexOf(UChar32 c,
  
  inline int32_t
  UnicodeString::lastIndexOf(UChar c) const
-{ return doLastIndexOf(c, 0, fLength); }
+{ return doLastIndexOf(c, 0, length()); }
  
  inline int32_t
  UnicodeString::lastIndexOf(UChar32 c) const {
-  return lastIndexOf(c, 0, fLength);
+  return lastIndexOf(c, 0, length());
  }
  
  inline int32_t
  UnicodeString::lastIndexOf(UChar c,
                 int32_t start) const {
    pinIndex(start);
-  return doLastIndexOf(c, start, fLength - start);
+  return doLastIndexOf(c, start, length() - start);
  }
  
  inline int32_t
  UnicodeString::lastIndexOf(UChar32 c,
                 int32_t start) const {
    pinIndex(start);
-  return lastIndexOf(c, start, fLength - start);
+  return lastIndexOf(c, start, length() - start);
  }
  
  inline UBool
  UnicodeString::startsWith(const UnicodeString& text) const
-{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
  
  inline UBool
  UnicodeString::startsWith(const UnicodeString& srcText,
@@ -3716,15 +3767,15 @@ UnicodeString::startsWith(const UChar *srcChars,
  
  inline UBool
  UnicodeString::endsWith(const UnicodeString& text) const
-{ return doCompare(fLength - text.fLength, text.fLength,
-           text, 0, text.fLength) == 0; }
+{ return doCompare(length() - text.length(), text.length(),
+           text, 0, text.length()) == 0; }
  
  inline UBool
  UnicodeString::endsWith(const UnicodeString& srcText,
              int32_t srcStart,
              int32_t srcLength) const {
    srcText.pinIndices(srcStart, srcLength);
-  return doCompare(fLength - srcLength, srcLength,
+  return doCompare(length() - srcLength, srcLength,
                     srcText, srcStart, srcLength) == 0;
  }
  
@@ -3734,7 +3785,7 @@ UnicodeString::endsWith(const UChar *srcChars,
    if(srcLength < 0) {
      srcLength = u_strlen(srcChars);
    }
-  return doCompare(fLength - srcLength, srcLength,
+  return doCompare(length() - srcLength, srcLength,
                     srcChars, 0, srcLength) == 0;
  }
  
@@ -3745,7 +3796,7 @@ UnicodeString::endsWith(const UChar *srcChars,
    if(srcLength < 0) {
      srcLength = u_strlen(srcChars + srcStart);
    }
-  return doCompare(fLength - srcLength, srcLength,
+  return doCompare(length() - srcLength, srcLength,
                     srcChars, srcStart, srcLength) == 0;
  }
  
@@ -3756,7 +3807,7 @@ inline UnicodeString&
  UnicodeString::replace(int32_t start,
                 int32_t _length,
                 const UnicodeString& srcText)
-{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
  
  inline UnicodeString&
  UnicodeString::replace(int32_t start,
@@ -3802,7 +3853,7 @@ inline UnicodeString&
  UnicodeString::replaceBetween(int32_t start,
                    int32_t limit,
                    const UnicodeString& srcText)
-{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  
  inline UnicodeString&
  UnicodeString::replaceBetween(int32_t start,
@@ -3815,16 +3866,16 @@ UnicodeString::replaceBetween(int32_t start,
  inline UnicodeString&
  UnicodeString::findAndReplace(const UnicodeString& oldText,
                    const UnicodeString& newText)
-{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
-            newText, 0, newText.fLength); }
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
  
  inline UnicodeString&
  UnicodeString::findAndReplace(int32_t start,
                    int32_t _length,
                    const UnicodeString& oldText,
                    const UnicodeString& newText)
-{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
-            newText, 0, newText.fLength); }
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+            newText, 0, newText.length()); }
  
  // ============================
  // extract
@@ -3833,7 +3884,7 @@ inline void
  UnicodeString::doExtract(int32_t start,
               int32_t _length,
               UnicodeString& target) const
-{ target.replace(0, target.fLength, *this, start, _length); }
+{ target.replace(0, target.length(), *this, start, _length); }
  
  inline void
  UnicodeString::extract(int32_t start,
@@ -3876,8 +3927,8 @@ UnicodeString::extractBetween(int32_t start,
  inline UChar
  UnicodeString::doCharAt(int32_t offset) const
  {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    return fArray[offset];
+  if((uint32_t)offset < (uint32_t)length()) {
+    return getArrayStart()[offset];
    } else {
      return kInvalidUChar;
    }
@@ -3894,9 +3945,11 @@ UnicodeString::operator[] (int32_t offset) const
  inline UChar32
  UnicodeString::char32At(int32_t offset) const
  {
-  if((uint32_t)offset < (uint32_t)fLength) {
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
      UChar32 c;
-    U16_GET(fArray, 0, offset, fLength, c);
+    U16_GET(array, 0, offset, len, c);
      return c;
    } else {
      return kInvalidUChar;
@@ -3905,8 +3958,9 @@ UnicodeString::char32At(int32_t offset) const
  
  inline int32_t
  UnicodeString::getChar32Start(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    U16_SET_CP_START(fArray, 0, offset);
+  if((uint32_t)offset < (uint32_t)length()) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_START(array, 0, offset);
      return offset;
    } else {
      return 0;
@@ -3915,43 +3969,79 @@ UnicodeString::getChar32Start(int32_t offset) const {
  
  inline int32_t
  UnicodeString::getChar32Limit(int32_t offset) const {
-  if((uint32_t)offset < (uint32_t)fLength) {
-    U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
+  int32_t len = length();
+  if((uint32_t)offset < (uint32_t)len) {
+    const UChar *array = getArrayStart();
+    U16_SET_CP_LIMIT(array, 0, offset, len);
      return offset;
    } else {
-    return fLength;
+    return len;
    }
  }
  
  inline UBool
  UnicodeString::isEmpty() const {
-  return fLength == 0;
+  return fShortLength == 0;
  }
  
  //========================================
  // Write implementation methods
  //========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+  if(len <= 127) {
+    fShortLength = (int8_t)len;
+  } else {
+    fShortLength = (int8_t)-1;
+    fUnion.fFields.fLength = len;
+  }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+  fShortLength = 0;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setToStackBuffer(int32_t len) {
+  fShortLength = (int8_t)len;
+  fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+  setLength(len);
+  fUnion.fFields.fArray = array;
+  fUnion.fFields.fCapacity = capacity;
+}
+
  inline const UChar *
  UnicodeString::getTerminatedBuffer() {
-  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+  if(!isWritable()) {
      return 0;
-  } else if(fLength<fCapacity && fArray[fLength]==0) {
-    return fArray;
-  } else if(cloneArrayIfNeeded(fLength+1)) {
-    fArray[fLength]=0;
-    return fArray;
    } else {
-    return 0;
+    UChar *array = getArrayStart();
+    int32_t len = length();
+    if(len < getCapacity() && array[len] == 0) {
+      return array;
+    } else if(cloneArrayIfNeeded(len+1)) {
+      array = getArrayStart();
+      array[len] = 0;
+      return array;
+    } else {
+      return 0;
+    }
    }
  }
  
  inline UnicodeString&
  UnicodeString::operator= (UChar ch)
-{ return doReplace(0, fLength, &ch, 0, 1); }
+{ return doReplace(0, length(), &ch, 0, 1); }
  
  inline UnicodeString&
  UnicodeString::operator= (UChar32 ch)
-{ return replace(0, fLength, ch); }
+{ return replace(0, length(), ch); }
  
  inline UnicodeString&
  UnicodeString::setTo(const UnicodeString& srcText,
@@ -3959,7 +4049,7 @@ UnicodeString::setTo(const UnicodeString& srcText,
               int32_t srcLength)
  {
    unBogus();
-  return doReplace(0, fLength, srcText, srcStart, srcLength);
+  return doReplace(0, length(), srcText, srcStart, srcLength);
  }
  
  inline UnicodeString&
@@ -3968,14 +4058,14 @@ UnicodeString::setTo(const UnicodeString& srcText,
  {
    unBogus();
    srcText.pinIndex(srcStart);
-  return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
+  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  }
  
  inline UnicodeString&
  UnicodeString::setTo(const UnicodeString& srcText)
  {
    unBogus();
-  return doReplace(0, fLength, srcText, 0, srcText.fLength);
+  return doReplace(0, length(), srcText, 0, srcText.length());
  }
  
  inline UnicodeString&
@@ -3983,64 +4073,47 @@ UnicodeString::setTo(const UChar *srcChars,
               int32_t srcLength)
  {
    unBogus();
-  return doReplace(0, fLength, srcChars, 0, srcLength);
+  return doReplace(0, length(), srcChars, 0, srcLength);
  }
  
  inline UnicodeString&
  UnicodeString::setTo(UChar srcChar)
  {
    unBogus();
-  return doReplace(0, fLength, &srcChar, 0, 1);
+  return doReplace(0, length(), &srcChar, 0, 1);
  }
  
  inline UnicodeString&
  UnicodeString::setTo(UChar32 srcChar)
  {
    unBogus();
-  return replace(0, fLength, srcChar);
+  return replace(0, length(), srcChar);
  }
  
-inline UnicodeString&
-UnicodeString::operator+= (UChar ch)
-{ return doReplace(fLength, 0, &ch, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::operator+= (UChar32 ch) {
-  UChar buffer[U16_MAX_LENGTH];
-  int32_t _length = 0;
-  UBool isError = FALSE;
-  U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
-  return doReplace(fLength, 0, buffer, 0, _length);
-}
-
-inline UnicodeString&
-UnicodeString::operator+= (const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
-
  inline UnicodeString&
  UnicodeString::append(const UnicodeString& srcText,
                int32_t srcStart,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
  
  inline UnicodeString&
  UnicodeString::append(const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
  
  inline UnicodeString&
  UnicodeString::append(const UChar *srcChars,
                int32_t srcStart,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
  
  inline UnicodeString&
  UnicodeString::append(const UChar *srcChars,
                int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
  
  inline UnicodeString&
  UnicodeString::append(UChar srcChar)
-{ return doReplace(fLength, 0, &srcChar, 0, 1); }
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
  
  inline UnicodeString&
  UnicodeString::append(UChar32 srcChar) {
@@ -4048,9 +4121,22 @@ UnicodeString::append(UChar32 srcChar) {
    int32_t _length = 0;
    UBool isError = FALSE;
    U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
-  return doReplace(fLength, 0, buffer, 0, _length);
+  return doReplace(length(), 0, buffer, 0, _length);
  }
  
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+  return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
  inline UnicodeString&
  UnicodeString::insert(int32_t start,
                const UnicodeString& srcText,
@@ -4061,7 +4147,7 @@ UnicodeString::insert(int32_t start,
  inline UnicodeString&
  UnicodeString::insert(int32_t start,
                const UnicodeString& srcText)
-{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
  
  inline UnicodeString&
  UnicodeString::insert(int32_t start,
@@ -4094,7 +4180,7 @@ UnicodeString::remove()
    if(isBogus()) {
      unBogus();
    } else {
-    fLength = 0;
+    setLength(0);
    }
    return *this;
  }
@@ -4103,12 +4189,11 @@ inline UnicodeString&
  UnicodeString::remove(int32_t start,
               int32_t _length)
  {
-  if(start <= 0 && _length == INT32_MAX) {
-    // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
-    return remove();
-  } else {
+    if(start <= 0 && _length == INT32_MAX) {
+        // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+        return remove();
+    }
      return doReplace(start, _length, NULL, 0, 0);
-  }
  }
  
  inline UnicodeString&
@@ -4123,8 +4208,8 @@ UnicodeString::truncate(int32_t targetLength)
      // truncate(0) of a bogus string makes the string empty and non-bogus
      unBogus();
      return FALSE;
-  } else if((uint32_t)targetLength < (uint32_t)fLength) {
-    fLength = targetLength;
+  } else if((uint32_t)targetLength < (uint32_t)length()) {
+    setLength(targetLength);
      return TRUE;
    } else {
      return FALSE;
@@ -4133,7 +4218,7 @@ UnicodeString::truncate(int32_t targetLength)
  
  inline UnicodeString&
  UnicodeString::reverse()
-{ return doReverse(0, fLength); }
+{ return doReverse(0, length()); }
  
  inline UnicodeString&
  UnicodeString::reverse(int32_t start,