icuSources/common/unicode/unistr.h

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1998-2016, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *
   7 * File unistr.h
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   09/25/98    stephen     Creation.
  13 *   11/11/98    stephen     Changed per 11/9 code review.
  14 *   04/20/99    stephen     Overhauled per 4/16 code review.
  15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
  16 *                           handleReplaceBetween(); other methods unchanged.
  17 *   06/25/01    grhoten     Remove dependency on iostream.
  18 ******************************************************************************
  19 */
  20
  21 #ifndef UNISTR_H
  22 #define UNISTR_H
  23
  24 /**
  25  * \file
  26  * \brief C++ API: Unicode String
  27  */
  28
  29 #include "unicode/utypes.h"
  30 #include "unicode/rep.h"
  31 #include "unicode/std_string.h"
  32 #include "unicode/stringpiece.h"
  33 #include "unicode/bytestream.h"
  34 #include "unicode/ucasemap.h"
  35
  36 struct UConverter;          // unicode/ucnv.h
  37
  38 #ifndef U_COMPARE_CODE_POINT_ORDER
  39 /* see also ustring.h and unorm.h */
  40 /**
  41  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  42  * Compare strings in code point order instead of code unit order.
  43  * @stable ICU 2.2
  44  */
  45 #define U_COMPARE_CODE_POINT_ORDER  0x8000
  46 #endif
  47
  48 #ifndef USTRING_H
  49 /**
  50  * \ingroup ustring_ustrlen
  51  */
  52 U_STABLE int32_t U_EXPORT2
  53 u_strlen(const UChar *s);
  54 #endif
  55
  56 /**
  57  * \def U_STRING_CASE_MAPPER_DEFINED
  58  * @internal
  59  */
  60 #ifndef U_STRING_CASE_MAPPER_DEFINED
  61 #define U_STRING_CASE_MAPPER_DEFINED
  62
  63 /**
  64  * Internal string case mapping function type.
  65  * @internal
  66  */
  67 typedef int32_t U_CALLCONV
  68 UStringCaseMapper(const UCaseMap *csm,
  69                   UChar *dest, int32_t destCapacity,
  70                   const UChar *src, int32_t srcLength,
  71                   UErrorCode *pErrorCode);
  72
  73 #endif
  74
  75 U_NAMESPACE_BEGIN
  76
  77 class BreakIterator;        // unicode/brkiter.h
  78 class Locale;               // unicode/locid.h
  79 class StringCharacterIterator;
  80 class UnicodeStringAppendable;  // unicode/appendable.h
  81
  82 /* The <iostream> include has been moved to unicode/ustream.h */
  83
  84 /**
  85  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  86  * which constructs a Unicode string from an invariant-character char * string.
  87  * About invariant characters see utypes.h.
  88  * This constructor has no runtime dependency on conversion code and is
  89  * therefore recommended over ones taking a charset name string
  90  * (where the empty string "" indicates invariant-character conversion).
  91  *
  92  * @stable ICU 3.2
  93  */
  94 #define US_INV icu::UnicodeString::kInvariant
  95
  96 /**
  97  * Unicode String literals in C++.
  98  * Dependent on the platform properties, different UnicodeString
  99  * constructors should be used to create a UnicodeString object from
 100  * a string literal.
 101  * The macros are defined for maximum performance.
 102  * They work only for strings that contain "invariant characters", i.e.,
 103  * only latin letters, digits, and some punctuation.
 104  * See utypes.h for details.
 105  *
 106  * The string parameter must be a C string literal.
 107  * The length of the string, not including the terminating
 108  * <code>NUL</code>, must be specified as a constant.
 109  * The U_STRING_DECL macro should be invoked exactly once for one
 110  * such string variable before it is used.
 111  * @stable ICU 2.0
 112  */
 113 #if defined(U_DECLARE_UTF16)
 114 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
 115 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
 116 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
 117 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
 118 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
 119 #else
 120 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
 121 #endif
 122
 123 /**
 124  * Unicode String literals in C++.
 125  * Dependent on the platform properties, different UnicodeString
 126  * constructors should be used to create a UnicodeString object from
 127  * a string literal.
 128  * The macros are defined for improved performance.
 129  * They work only for strings that contain "invariant characters", i.e.,
 130  * only latin letters, digits, and some punctuation.
 131  * See utypes.h for details.
 132  *
 133  * The string parameter must be a C string literal.
 134  * @stable ICU 2.0
 135  */
 136 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
 137
 138 /**
 139  * \def UNISTR_FROM_CHAR_EXPLICIT
 140  * This can be defined to be empty or "explicit".
 141  * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
 142  * constructors are marked as explicit, preventing their inadvertent use.
 143  * @stable ICU 49
 144  */
 145 #ifndef UNISTR_FROM_CHAR_EXPLICIT
 146 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
 147     // Auto-"explicit" in ICU library code.
 148 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
 149 # else
 150     // Empty by default for source code compatibility.
 151 #   define UNISTR_FROM_CHAR_EXPLICIT
 152 # endif
 153 #endif
 154
 155 /**
 156  * \def UNISTR_FROM_STRING_EXPLICIT
 157  * This can be defined to be empty or "explicit".
 158  * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
 159  * constructors are marked as explicit, preventing their inadvertent use.
 160  *
 161  * In particular, this helps prevent accidentally depending on ICU conversion code
 162  * by passing a string literal into an API with a const UnicodeString & parameter.
 163  * @stable ICU 49
 164  */
 165 #ifndef UNISTR_FROM_STRING_EXPLICIT
 166 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
 167     // Auto-"explicit" in ICU library code.
 168 #   define UNISTR_FROM_STRING_EXPLICIT explicit
 169 # else
 170     // Empty by default for source code compatibility.
 171 #   define UNISTR_FROM_STRING_EXPLICIT
 172 # endif
 173 #endif
 174
 175 /* Cannot make the following #ifndef U_HIDE_DRAFT_API,
 176    it is used to construct other non-internal constants */
 177 /**
 178  * \def UNISTR_OBJECT_SIZE
 179  * Desired sizeof(UnicodeString) in bytes.
 180  * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
 181  * The object size may want to be a multiple of 16 bytes,
 182  * which is a common granularity for heap allocation.
 183  *
 184  * Any space inside the object beyond sizeof(vtable pointer) + 2
 185  * is available for storing short strings inside the object.
 186  * The bigger the object, the longer a string that can be stored inside the object,
 187  * without additional heap allocation.
 188  *
 189  * Depending on a platform's pointer size, pointer alignment requirements,
 190  * and struct padding, the compiler will usually round up sizeof(UnicodeString)
 191  * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
 192  * to hold the fields for heap-allocated strings.
 193  * Such a minimum size also ensures that the object is easily large enough
 194  * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
 195  *
 196  * sizeof(UnicodeString) >= 48 should work for all known platforms.
 197  *
 198  * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
 199  * sizeof(UnicodeString) = 64 would leave space for
 200  * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
 201  * UChars stored inside the object.
 202  *
 203  * The minimum object size on a 64-bit machine would be
 204  * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
 205  * and the internal buffer would hold up to 11 UChars in that case.
 206  *
 207  * @see U16_MAX_LENGTH
 208  * @draft ICU 56
 209  */
 210 #ifndef UNISTR_OBJECT_SIZE
 211 # define UNISTR_OBJECT_SIZE 64
 212 #endif
 213
 214 /**
 215  * UnicodeString is a string class that stores Unicode characters directly and provides
 216  * similar functionality as the Java String and StringBuffer/StringBuilder classes.
 217  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
 218  *
 219  * A UnicodeString may also "alias" an external array of characters
 220  * (that is, point to it, rather than own the array)
 221  * whose lifetime must then at least match the lifetime of the aliasing object.
 222  * This aliasing may be preserved when returning a UnicodeString by value,
 223  * depending on the compiler and the function implementation,
 224  * via Return Value Optimization (RVO) or the move assignment operator.
 225  * (However, the copy assignment operator does not preserve aliasing.)
 226  * For details see the description of storage models at the end of the class API docs
 227  * and in the User Guide chapter linked from there.
 228  *
 229  * The UnicodeString class is not suitable for subclassing.
 230  *
 231  * <p>For an overview of Unicode strings in C and C++ see the
 232  * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
 233  *
 234  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
 235  * A Unicode character may be stored with either one code unit
 236  * (the most common case) or with a matched pair of special code units
 237  * ("surrogates"). The data type for code units is UChar.
 238  * For single-character handling, a Unicode character code <em>point</em> is a value
 239  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
 240  *
 241  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
 242  * This is the same as with multi-byte char* strings in traditional string handling.
 243  * Operations on partial strings typically do not test for code point boundaries.
 244  * If necessary, the user needs to take care of such boundaries by testing for the code unit
 245  * values or by using functions like
 246  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
 247  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
 248  *
 249  * UnicodeString methods are more lenient with regard to input parameter values
 250  * than other ICU APIs. In particular:
 251  * - If indexes are out of bounds for a UnicodeString object
 252  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
 253  * - If primitive string pointer values (e.g., const UChar * or char *)
 254  *   for input strings are NULL, then those input string parameters are treated
 255  *   as if they pointed to an empty string.
 256  *   However, this is <em>not</em> the case for char * parameters for charset names
 257  *   or other IDs.
 258  * - Most UnicodeString methods do not take a UErrorCode parameter because
 259  *   there are usually very few opportunities for failure other than a shortage
 260  *   of memory, error codes in low-level C++ string methods would be inconvenient,
 261  *   and the error code as the last parameter (ICU convention) would prevent
 262  *   the use of default parameter values.
 263  *   Instead, such methods set the UnicodeString into a "bogus" state
 264  *   (see isBogus()) if an error occurs.
 265  *
 266  * In string comparisons, two UnicodeString objects that are both "bogus"
 267  * compare equal (to be transitive and prevent endless loops in sorting),
 268  * and a "bogus" string compares less than any non-"bogus" one.
 269  *
 270  * Const UnicodeString methods are thread-safe. Multiple threads can use
 271  * const methods on the same UnicodeString object simultaneously,
 272  * but non-const methods must not be called concurrently (in multiple threads)
 273  * with any other (const or non-const) methods.
 274  *
 275  * Similarly, const UnicodeString & parameters are thread-safe.
 276  * One object may be passed in as such a parameter concurrently in multiple threads.
 277  * This includes the const UnicodeString & parameters for
 278  * copy construction, assignment, and cloning.
 279  *
 280  * <p>UnicodeString uses several storage methods.
 281  * String contents can be stored inside the UnicodeString object itself,
 282  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
 283  * Most of this is done transparently, but careful aliasing in particular provides
 284  * significant performance improvements.
 285  * Also, the internal buffer is accessible via special functions.
 286  * For details see the
 287  * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
 288  *
 289  * @see utf.h
 290  * @see CharacterIterator
 291  * @stable ICU 2.0
 292  */
 293 class U_COMMON_API UnicodeString : public Replaceable
 294 {
 295 public:
 296
 297   /**
 298    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
 299    * which constructs a Unicode string from an invariant-character char * string.
 300    * Use the macro US_INV instead of the full qualification for this value.
 301    *
 302    * @see US_INV
 303    * @stable ICU 3.2
 304    */
 305   enum EInvariant {
 306     /**
 307      * @see EInvariant
 308      * @stable ICU 3.2
 309      */
 310     kInvariant
 311   };
 312
 313   //========================================
 314   // Read-only operations
 315   //========================================
 316
 317   /* Comparison - bitwise only - for international comparison use collation */
 318
 319   /**
 320    * Equality operator. Performs only bitwise comparison.
 321    * @param text The UnicodeString to compare to this one.
 322    * @return TRUE if <TT>text</TT> contains the same characters as this one,
 323    * FALSE otherwise.
 324    * @stable ICU 2.0
 325    */
 326   inline UBool operator== (const UnicodeString& text) const;
 327
 328   /**
 329    * Inequality operator. Performs only bitwise comparison.
 330    * @param text The UnicodeString to compare to this one.
 331    * @return FALSE if <TT>text</TT> contains the same characters as this one,
 332    * TRUE otherwise.
 333    * @stable ICU 2.0
 334    */
 335   inline UBool operator!= (const UnicodeString& text) const;
 336
 337   /**
 338    * Greater than operator. Performs only bitwise comparison.
 339    * @param text The UnicodeString to compare to this one.
 340    * @return TRUE if the characters in this are bitwise
 341    * greater than the characters in <code>text</code>, FALSE otherwise
 342    * @stable ICU 2.0
 343    */
 344   inline UBool operator> (const UnicodeString& text) const;
 345
 346   /**
 347    * Less than operator. Performs only bitwise comparison.
 348    * @param text The UnicodeString to compare to this one.
 349    * @return TRUE if the characters in this are bitwise
 350    * less than the characters in <code>text</code>, FALSE otherwise
 351    * @stable ICU 2.0
 352    */
 353   inline UBool operator< (const UnicodeString& text) const;
 354
 355   /**
 356    * Greater than or equal operator. Performs only bitwise comparison.
 357    * @param text The UnicodeString to compare to this one.
 358    * @return TRUE if the characters in this are bitwise
 359    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
 360    * @stable ICU 2.0
 361    */
 362   inline UBool operator>= (const UnicodeString& text) const;
 363
 364   /**
 365    * Less than or equal operator. Performs only bitwise comparison.
 366    * @param text The UnicodeString to compare to this one.
 367    * @return TRUE if the characters in this are bitwise
 368    * less than or equal to the characters in <code>text</code>, FALSE otherwise
 369    * @stable ICU 2.0
 370    */
 371   inline UBool operator<= (const UnicodeString& text) const;
 372
 373   /**
 374    * Compare the characters bitwise in this UnicodeString to
 375    * the characters in <code>text</code>.
 376    * @param text The UnicodeString to compare to this one.
 377    * @return The result of bitwise character comparison: 0 if this
 378    * contains the same characters as <code>text</code>, -1 if the characters in
 379    * this are bitwise less than the characters in <code>text</code>, +1 if the
 380    * characters in this are bitwise greater than the characters
 381    * in <code>text</code>.
 382    * @stable ICU 2.0
 383    */
 384   inline int8_t compare(const UnicodeString& text) const;
 385
 386   /**
 387    * Compare the characters bitwise in the range
 388    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
 389    * in the <b>entire string</b> <TT>text</TT>.
 390    * (The parameters "start" and "length" are not applied to the other text "text".)
 391    * @param start the offset at which the compare operation begins
 392    * @param length the number of characters of text to compare.
 393    * @param text the other text to be compared against this string.
 394    * @return The result of bitwise character comparison: 0 if this
 395    * contains the same characters as <code>text</code>, -1 if the characters in
 396    * this are bitwise less than the characters in <code>text</code>, +1 if the
 397    * characters in this are bitwise greater than the characters
 398    * in <code>text</code>.
 399    * @stable ICU 2.0
 400    */
 401   inline int8_t compare(int32_t start,
 402          int32_t length,
 403          const UnicodeString& text) const;
 404
 405   /**
 406    * Compare the characters bitwise in the range
 407    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
 408    * in <TT>srcText</TT> in the range
 409    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 410    * @param start the offset at which the compare operation begins
 411    * @param length the number of characters in this to compare.
 412    * @param srcText the text to be compared
 413    * @param srcStart the offset into <TT>srcText</TT> to start comparison
 414    * @param srcLength the number of characters in <TT>src</TT> to compare
 415    * @return The result of bitwise character comparison: 0 if this
 416    * contains the same characters as <code>srcText</code>, -1 if the characters in
 417    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
 418    * characters in this are bitwise greater than the characters
 419    * in <code>srcText</code>.
 420    * @stable ICU 2.0
 421    */
 422    inline int8_t compare(int32_t start,
 423          int32_t length,
 424          const UnicodeString& srcText,
 425          int32_t srcStart,
 426          int32_t srcLength) const;
 427
 428   /**
 429    * Compare the characters bitwise in this UnicodeString with the first
 430    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
 431    * @param srcChars The characters to compare to this UnicodeString.
 432    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
 433    * @return The result of bitwise character comparison: 0 if this
 434    * contains the same characters as <code>srcChars</code>, -1 if the characters in
 435    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
 436    * characters in this are bitwise greater than the characters
 437    * in <code>srcChars</code>.
 438    * @stable ICU 2.0
 439    */
 440   inline int8_t compare(const UChar *srcChars,
 441          int32_t srcLength) const;
 442
 443   /**
 444    * Compare the characters bitwise in the range
 445    * [<TT>start</TT>, <TT>start + length</TT>) with the first
 446    * <TT>length</TT> characters in <TT>srcChars</TT>
 447    * @param start the offset at which the compare operation begins
 448    * @param length the number of characters to compare.
 449    * @param srcChars the characters to be compared
 450    * @return The result of bitwise character comparison: 0 if this
 451    * contains the same characters as <code>srcChars</code>, -1 if the characters in
 452    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
 453    * characters in this are bitwise greater than the characters
 454    * in <code>srcChars</code>.
 455    * @stable ICU 2.0
 456    */
 457   inline int8_t compare(int32_t start,
 458          int32_t length,
 459          const UChar *srcChars) const;
 460
 461   /**
 462    * Compare the characters bitwise in the range
 463    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
 464    * in <TT>srcChars</TT> in the range
 465    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 466    * @param start the offset at which the compare operation begins
 467    * @param length the number of characters in this to compare
 468    * @param srcChars the characters to be compared
 469    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
 470    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
 471    * @return The result of bitwise character comparison: 0 if this
 472    * contains the same characters as <code>srcChars</code>, -1 if the characters in
 473    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
 474    * characters in this are bitwise greater than the characters
 475    * in <code>srcChars</code>.
 476    * @stable ICU 2.0
 477    */
 478   inline int8_t compare(int32_t start,
 479          int32_t length,
 480          const UChar *srcChars,
 481          int32_t srcStart,
 482          int32_t srcLength) const;
 483
 484   /**
 485    * Compare the characters bitwise in the range
 486    * [<TT>start</TT>, <TT>limit</TT>) with the characters
 487    * in <TT>srcText</TT> in the range
 488    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
 489    * @param start the offset at which the compare operation begins
 490    * @param limit the offset immediately following the compare operation
 491    * @param srcText the text to be compared
 492    * @param srcStart the offset into <TT>srcText</TT> to start comparison
 493    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
 494    * @return The result of bitwise character comparison: 0 if this
 495    * contains the same characters as <code>srcText</code>, -1 if the characters in
 496    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
 497    * characters in this are bitwise greater than the characters
 498    * in <code>srcText</code>.
 499    * @stable ICU 2.0
 500    */
 501   inline int8_t compareBetween(int32_t start,
 502             int32_t limit,
 503             const UnicodeString& srcText,
 504             int32_t srcStart,
 505             int32_t srcLimit) const;
 506
 507   /**
 508    * Compare two Unicode strings in code point order.
 509    * The result may be different from the results of compare(), operator<, etc.
 510    * if supplementary characters are present:
 511    *
 512    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 513    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 514    * which means that they compare as less than some other BMP characters like U+feff.
 515    * This function compares Unicode strings in code point order.
 516    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 517    *
 518    * @param text Another string to compare this one to.
 519    * @return a negative/zero/positive integer corresponding to whether
 520    * this string is less than/equal to/greater than the second one
 521    * in code point order
 522    * @stable ICU 2.0
 523    */
 524   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
 525
 526   /**
 527    * Compare two Unicode strings in code point order.
 528    * The result may be different from the results of compare(), operator<, etc.
 529    * if supplementary characters are present:
 530    *
 531    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 532    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 533    * which means that they compare as less than some other BMP characters like U+feff.
 534    * This function compares Unicode strings in code point order.
 535    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 536    *
 537    * @param start The start offset in this string at which the compare operation begins.
 538    * @param length The number of code units from this string to compare.
 539    * @param srcText Another string to compare this one to.
 540    * @return a negative/zero/positive integer corresponding to whether
 541    * this string is less than/equal to/greater than the second one
 542    * in code point order
 543    * @stable ICU 2.0
 544    */
 545   inline int8_t compareCodePointOrder(int32_t start,
 546                                       int32_t length,
 547                                       const UnicodeString& srcText) const;
 548
 549   /**
 550    * Compare two Unicode strings in code point order.
 551    * The result may be different from the results of compare(), operator<, etc.
 552    * if supplementary characters are present:
 553    *
 554    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 555    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 556    * which means that they compare as less than some other BMP characters like U+feff.
 557    * This function compares Unicode strings in code point order.
 558    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 559    *
 560    * @param start The start offset in this string at which the compare operation begins.
 561    * @param length The number of code units from this string to compare.
 562    * @param srcText Another string to compare this one to.
 563    * @param srcStart The start offset in that string at which the compare operation begins.
 564    * @param srcLength The number of code units from that string to compare.
 565    * @return a negative/zero/positive integer corresponding to whether
 566    * this string is less than/equal to/greater than the second one
 567    * in code point order
 568    * @stable ICU 2.0
 569    */
 570    inline int8_t compareCodePointOrder(int32_t start,
 571                                        int32_t length,
 572                                        const UnicodeString& srcText,
 573                                        int32_t srcStart,
 574                                        int32_t srcLength) const;
 575
 576   /**
 577    * Compare two Unicode strings in code point order.
 578    * The result may be different from the results of compare(), operator<, etc.
 579    * if supplementary characters are present:
 580    *
 581    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 582    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 583    * which means that they compare as less than some other BMP characters like U+feff.
 584    * This function compares Unicode strings in code point order.
 585    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 586    *
 587    * @param srcChars A pointer to another string to compare this one to.
 588    * @param srcLength The number of code units from that string to compare.
 589    * @return a negative/zero/positive integer corresponding to whether
 590    * this string is less than/equal to/greater than the second one
 591    * in code point order
 592    * @stable ICU 2.0
 593    */
 594   inline int8_t compareCodePointOrder(const UChar *srcChars,
 595                                       int32_t srcLength) const;
 596
 597   /**
 598    * Compare two Unicode strings in code point order.
 599    * The result may be different from the results of compare(), operator<, etc.
 600    * if supplementary characters are present:
 601    *
 602    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 603    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 604    * which means that they compare as less than some other BMP characters like U+feff.
 605    * This function compares Unicode strings in code point order.
 606    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 607    *
 608    * @param start The start offset in this string at which the compare operation begins.
 609    * @param length The number of code units from this string to compare.
 610    * @param srcChars A pointer to another string to compare this one to.
 611    * @return a negative/zero/positive integer corresponding to whether
 612    * this string is less than/equal to/greater than the second one
 613    * in code point order
 614    * @stable ICU 2.0
 615    */
 616   inline int8_t compareCodePointOrder(int32_t start,
 617                                       int32_t length,
 618                                       const UChar *srcChars) const;
 619
 620   /**
 621    * Compare two Unicode strings in code point order.
 622    * The result may be different from the results of compare(), operator<, etc.
 623    * if supplementary characters are present:
 624    *
 625    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 626    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 627    * which means that they compare as less than some other BMP characters like U+feff.
 628    * This function compares Unicode strings in code point order.
 629    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 630    *
 631    * @param start The start offset in this string at which the compare operation begins.
 632    * @param length The number of code units from this string to compare.
 633    * @param srcChars A pointer to another string to compare this one to.
 634    * @param srcStart The start offset in that string at which the compare operation begins.
 635    * @param srcLength The number of code units from that string to compare.
 636    * @return a negative/zero/positive integer corresponding to whether
 637    * this string is less than/equal to/greater than the second one
 638    * in code point order
 639    * @stable ICU 2.0
 640    */
 641   inline int8_t compareCodePointOrder(int32_t start,
 642                                       int32_t length,
 643                                       const UChar *srcChars,
 644                                       int32_t srcStart,
 645                                       int32_t srcLength) const;
 646
 647   /**
 648    * Compare two Unicode strings in code point order.
 649    * The result may be different from the results of compare(), operator<, etc.
 650    * if supplementary characters are present:
 651    *
 652    * In UTF-16, supplementary characters (with code points U+10000 and above) are
 653    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
 654    * which means that they compare as less than some other BMP characters like U+feff.
 655    * This function compares Unicode strings in code point order.
 656    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
 657    *
 658    * @param start The start offset in this string at which the compare operation begins.
 659    * @param limit The offset after the last code unit from this string to compare.
 660    * @param srcText Another string to compare this one to.
 661    * @param srcStart The start offset in that string at which the compare operation begins.
 662    * @param srcLimit The offset after the last code unit from that string to compare.
 663    * @return a negative/zero/positive integer corresponding to whether
 664    * this string is less than/equal to/greater than the second one
 665    * in code point order
 666    * @stable ICU 2.0
 667    */
 668   inline int8_t compareCodePointOrderBetween(int32_t start,
 669                                              int32_t limit,
 670                                              const UnicodeString& srcText,
 671                                              int32_t srcStart,
 672                                              int32_t srcLimit) const;
 673
 674   /**
 675    * Compare two strings case-insensitively using full case folding.
 676    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
 677    *
 678    * @param text Another string to compare this one to.
 679    * @param options A bit set of options:
 680    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 681    *     Comparison in code unit order with default case folding.
 682    *
 683    *   - U_COMPARE_CODE_POINT_ORDER
 684    *     Set to choose code point order instead of code unit order
 685    *     (see u_strCompare for details).
 686    *
 687    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 688    *
 689    * @return A negative, zero, or positive integer indicating the comparison result.
 690    * @stable ICU 2.0
 691    */
 692   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
 693
 694   /**
 695    * Compare two strings case-insensitively using full case folding.
 696    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
 697    *
 698    * @param start The start offset in this string at which the compare operation begins.
 699    * @param length The number of code units from this string to compare.
 700    * @param srcText Another string to compare this one to.
 701    * @param options A bit set of options:
 702    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 703    *     Comparison in code unit order with default case folding.
 704    *
 705    *   - U_COMPARE_CODE_POINT_ORDER
 706    *     Set to choose code point order instead of code unit order
 707    *     (see u_strCompare for details).
 708    *
 709    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 710    *
 711    * @return A negative, zero, or positive integer indicating the comparison result.
 712    * @stable ICU 2.0
 713    */
 714   inline int8_t caseCompare(int32_t start,
 715          int32_t length,
 716          const UnicodeString& srcText,
 717          uint32_t options) const;
 718
 719   /**
 720    * Compare two strings case-insensitively using full case folding.
 721    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
 722    *
 723    * @param start The start offset in this string at which the compare operation begins.
 724    * @param length The number of code units from this string to compare.
 725    * @param srcText Another string to compare this one to.
 726    * @param srcStart The start offset in that string at which the compare operation begins.
 727    * @param srcLength The number of code units from that string to compare.
 728    * @param options A bit set of options:
 729    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 730    *     Comparison in code unit order with default case folding.
 731    *
 732    *   - U_COMPARE_CODE_POINT_ORDER
 733    *     Set to choose code point order instead of code unit order
 734    *     (see u_strCompare for details).
 735    *
 736    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 737    *
 738    * @return A negative, zero, or positive integer indicating the comparison result.
 739    * @stable ICU 2.0
 740    */
 741   inline int8_t caseCompare(int32_t start,
 742          int32_t length,
 743          const UnicodeString& srcText,
 744          int32_t srcStart,
 745          int32_t srcLength,
 746          uint32_t options) const;
 747
 748   /**
 749    * Compare two strings case-insensitively using full case folding.
 750    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
 751    *
 752    * @param srcChars A pointer to another string to compare this one to.
 753    * @param srcLength The number of code units from that string to compare.
 754    * @param options A bit set of options:
 755    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 756    *     Comparison in code unit order with default case folding.
 757    *
 758    *   - U_COMPARE_CODE_POINT_ORDER
 759    *     Set to choose code point order instead of code unit order
 760    *     (see u_strCompare for details).
 761    *
 762    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 763    *
 764    * @return A negative, zero, or positive integer indicating the comparison result.
 765    * @stable ICU 2.0
 766    */
 767   inline int8_t caseCompare(const UChar *srcChars,
 768          int32_t srcLength,
 769          uint32_t options) const;
 770
 771   /**
 772    * Compare two strings case-insensitively using full case folding.
 773    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
 774    *
 775    * @param start The start offset in this string at which the compare operation begins.
 776    * @param length The number of code units from this string to compare.
 777    * @param srcChars A pointer to another string to compare this one to.
 778    * @param options A bit set of options:
 779    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 780    *     Comparison in code unit order with default case folding.
 781    *
 782    *   - U_COMPARE_CODE_POINT_ORDER
 783    *     Set to choose code point order instead of code unit order
 784    *     (see u_strCompare for details).
 785    *
 786    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 787    *
 788    * @return A negative, zero, or positive integer indicating the comparison result.
 789    * @stable ICU 2.0
 790    */
 791   inline int8_t caseCompare(int32_t start,
 792          int32_t length,
 793          const UChar *srcChars,
 794          uint32_t options) const;
 795
 796   /**
 797    * Compare two strings case-insensitively using full case folding.
 798    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
 799    *
 800    * @param start The start offset in this string at which the compare operation begins.
 801    * @param length The number of code units from this string to compare.
 802    * @param srcChars A pointer to another string to compare this one to.
 803    * @param srcStart The start offset in that string at which the compare operation begins.
 804    * @param srcLength The number of code units from that string to compare.
 805    * @param options A bit set of options:
 806    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 807    *     Comparison in code unit order with default case folding.
 808    *
 809    *   - U_COMPARE_CODE_POINT_ORDER
 810    *     Set to choose code point order instead of code unit order
 811    *     (see u_strCompare for details).
 812    *
 813    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 814    *
 815    * @return A negative, zero, or positive integer indicating the comparison result.
 816    * @stable ICU 2.0
 817    */
 818   inline int8_t caseCompare(int32_t start,
 819          int32_t length,
 820          const UChar *srcChars,
 821          int32_t srcStart,
 822          int32_t srcLength,
 823          uint32_t options) const;
 824
 825   /**
 826    * Compare two strings case-insensitively using full case folding.
 827    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
 828    *
 829    * @param start The start offset in this string at which the compare operation begins.
 830    * @param limit The offset after the last code unit from this string to compare.
 831    * @param srcText Another string to compare this one to.
 832    * @param srcStart The start offset in that string at which the compare operation begins.
 833    * @param srcLimit The offset after the last code unit from that string to compare.
 834    * @param options A bit set of options:
 835    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
 836    *     Comparison in code unit order with default case folding.
 837    *
 838    *   - U_COMPARE_CODE_POINT_ORDER
 839    *     Set to choose code point order instead of code unit order
 840    *     (see u_strCompare for details).
 841    *
 842    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
 843    *
 844    * @return A negative, zero, or positive integer indicating the comparison result.
 845    * @stable ICU 2.0
 846    */
 847   inline int8_t caseCompareBetween(int32_t start,
 848             int32_t limit,
 849             const UnicodeString& srcText,
 850             int32_t srcStart,
 851             int32_t srcLimit,
 852             uint32_t options) const;
 853
 854   /**
 855    * Determine if this starts with the characters in <TT>text</TT>
 856    * @param text The text to match.
 857    * @return TRUE if this starts with the characters in <TT>text</TT>,
 858    * FALSE otherwise
 859    * @stable ICU 2.0
 860    */
 861   inline UBool startsWith(const UnicodeString& text) const;
 862
 863   /**
 864    * Determine if this starts with the characters in <TT>srcText</TT>
 865    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 866    * @param srcText The text to match.
 867    * @param srcStart the offset into <TT>srcText</TT> to start matching
 868    * @param srcLength the number of characters in <TT>srcText</TT> to match
 869    * @return TRUE if this starts with the characters in <TT>text</TT>,
 870    * FALSE otherwise
 871    * @stable ICU 2.0
 872    */
 873   inline UBool startsWith(const UnicodeString& srcText,
 874             int32_t srcStart,
 875             int32_t srcLength) const;
 876
 877   /**
 878    * Determine if this starts with the characters in <TT>srcChars</TT>
 879    * @param srcChars The characters to match.
 880    * @param srcLength the number of characters in <TT>srcChars</TT>
 881    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
 882    * FALSE otherwise
 883    * @stable ICU 2.0
 884    */
 885   inline UBool startsWith(const UChar *srcChars,
 886             int32_t srcLength) const;
 887
 888   /**
 889    * Determine if this ends with the characters in <TT>srcChars</TT>
 890    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 891    * @param srcChars The characters to match.
 892    * @param srcStart the offset into <TT>srcText</TT> to start matching
 893    * @param srcLength the number of characters in <TT>srcChars</TT> to match
 894    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
 895    * @stable ICU 2.0
 896    */
 897   inline UBool startsWith(const UChar *srcChars,
 898             int32_t srcStart,
 899             int32_t srcLength) const;
 900
 901   /**
 902    * Determine if this ends with the characters in <TT>text</TT>
 903    * @param text The text to match.
 904    * @return TRUE if this ends with the characters in <TT>text</TT>,
 905    * FALSE otherwise
 906    * @stable ICU 2.0
 907    */
 908   inline UBool endsWith(const UnicodeString& text) const;
 909
 910   /**
 911    * Determine if this ends with the characters in <TT>srcText</TT>
 912    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 913    * @param srcText The text to match.
 914    * @param srcStart the offset into <TT>srcText</TT> to start matching
 915    * @param srcLength the number of characters in <TT>srcText</TT> to match
 916    * @return TRUE if this ends with the characters in <TT>text</TT>,
 917    * FALSE otherwise
 918    * @stable ICU 2.0
 919    */
 920   inline UBool endsWith(const UnicodeString& srcText,
 921           int32_t srcStart,
 922           int32_t srcLength) const;
 923
 924   /**
 925    * Determine if this ends with the characters in <TT>srcChars</TT>
 926    * @param srcChars The characters to match.
 927    * @param srcLength the number of characters in <TT>srcChars</TT>
 928    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
 929    * FALSE otherwise
 930    * @stable ICU 2.0
 931    */
 932   inline UBool endsWith(const UChar *srcChars,
 933           int32_t srcLength) const;
 934
 935   /**
 936    * Determine if this ends with the characters in <TT>srcChars</TT>
 937    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
 938    * @param srcChars The characters to match.
 939    * @param srcStart the offset into <TT>srcText</TT> to start matching
 940    * @param srcLength the number of characters in <TT>srcChars</TT> to match
 941    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
 942    * FALSE otherwise
 943    * @stable ICU 2.0
 944    */
 945   inline UBool endsWith(const UChar *srcChars,
 946           int32_t srcStart,
 947           int32_t srcLength) const;
 948
 949
 950   /* Searching - bitwise only */
 951
 952   /**
 953    * Locate in this the first occurrence of the characters in <TT>text</TT>,
 954    * using bitwise comparison.
 955    * @param text The text to search for.
 956    * @return The offset into this of the start of <TT>text</TT>,
 957    * or -1 if not found.
 958    * @stable ICU 2.0
 959    */
 960   inline int32_t indexOf(const UnicodeString& text) const;
 961
 962   /**
 963    * Locate in this the first occurrence of the characters in <TT>text</TT>
 964    * starting at offset <TT>start</TT>, using bitwise comparison.
 965    * @param text The text to search for.
 966    * @param start The offset at which searching will start.
 967    * @return The offset into this of the start of <TT>text</TT>,
 968    * or -1 if not found.
 969    * @stable ICU 2.0
 970    */
 971   inline int32_t indexOf(const UnicodeString& text,
 972               int32_t start) const;
 973
 974   /**
 975    * Locate in this the first occurrence in the range
 976    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
 977    * in <TT>text</TT>, using bitwise comparison.
 978    * @param text The text to search for.
 979    * @param start The offset at which searching will start.
 980    * @param length The number of characters to search
 981    * @return The offset into this of the start of <TT>text</TT>,
 982    * or -1 if not found.
 983    * @stable ICU 2.0
 984    */
 985   inline int32_t indexOf(const UnicodeString& text,
 986               int32_t start,
 987               int32_t length) const;
 988
 989   /**
 990    * Locate in this the first occurrence in the range
 991    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
 992    *  in <TT>srcText</TT> in the range
 993    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
 994    * using bitwise comparison.
 995    * @param srcText The text to search for.
 996    * @param srcStart the offset into <TT>srcText</TT> at which
 997    * to start matching
 998    * @param srcLength the number of characters in <TT>srcText</TT> to match
 999    * @param start the offset into this at which to start matching
1000    * @param length the number of characters in this to search
1001    * @return The offset into this of the start of <TT>text</TT>,
1002    * or -1 if not found.
1003    * @stable ICU 2.0
1004    */
1005   inline int32_t indexOf(const UnicodeString& srcText,
1006               int32_t srcStart,
1007               int32_t srcLength,
1008               int32_t start,
1009               int32_t length) const;
1010
1011   /**
1012    * Locate in this the first occurrence of the characters in
1013    * <TT>srcChars</TT>
1014    * starting at offset <TT>start</TT>, using bitwise comparison.
1015    * @param srcChars The text to search for.
1016    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1017    * @param start the offset into this at which to start matching
1018    * @return The offset into this of the start of <TT>text</TT>,
1019    * or -1 if not found.
1020    * @stable ICU 2.0
1021    */
1022   inline int32_t indexOf(const UChar *srcChars,
1023               int32_t srcLength,
1024               int32_t start) const;
1025
1026   /**
1027    * Locate in this the first occurrence in the range
1028    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1029    * in <TT>srcChars</TT>, using bitwise comparison.
1030    * @param srcChars The text to search for.
1031    * @param srcLength the number of characters in <TT>srcChars</TT>
1032    * @param start The offset at which searching will start.
1033    * @param length The number of characters to search
1034    * @return The offset into this of the start of <TT>srcChars</TT>,
1035    * or -1 if not found.
1036    * @stable ICU 2.0
1037    */
1038   inline int32_t indexOf(const UChar *srcChars,
1039               int32_t srcLength,
1040               int32_t start,
1041               int32_t length) const;
1042
1043   /**
1044    * Locate in this the first occurrence in the range
1045    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1046    * in <TT>srcChars</TT> in the range
1047    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1048    * using bitwise comparison.
1049    * @param srcChars The text to search for.
1050    * @param srcStart the offset into <TT>srcChars</TT> at which
1051    * to start matching
1052    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1053    * @param start the offset into this at which to start matching
1054    * @param length the number of characters in this to search
1055    * @return The offset into this of the start of <TT>text</TT>,
1056    * or -1 if not found.
1057    * @stable ICU 2.0
1058    */
1059   int32_t indexOf(const UChar *srcChars,
1060               int32_t srcStart,
1061               int32_t srcLength,
1062               int32_t start,
1063               int32_t length) const;
1064
1065   /**
1066    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1067    * using bitwise comparison.
1068    * @param c The code unit to search for.
1069    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1070    * @stable ICU 2.0
1071    */
1072   inline int32_t indexOf(UChar c) const;
1073
1074   /**
1075    * Locate in this the first occurrence of the code point <TT>c</TT>,
1076    * using bitwise comparison.
1077    *
1078    * @param c The code point to search for.
1079    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1080    * @stable ICU 2.0
1081    */
1082   inline int32_t indexOf(UChar32 c) const;
1083
1084   /**
1085    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1086    * starting at offset <TT>start</TT>, using bitwise comparison.
1087    * @param c The code unit to search for.
1088    * @param start The offset at which searching will start.
1089    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1090    * @stable ICU 2.0
1091    */
1092   inline int32_t indexOf(UChar c,
1093               int32_t start) const;
1094
1095   /**
1096    * Locate in this the first occurrence of the code point <TT>c</TT>
1097    * starting at offset <TT>start</TT>, using bitwise comparison.
1098    *
1099    * @param c The code point to search for.
1100    * @param start The offset at which searching will start.
1101    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1102    * @stable ICU 2.0
1103    */
1104   inline int32_t indexOf(UChar32 c,
1105               int32_t start) const;
1106
1107   /**
1108    * Locate in this the first occurrence of the BMP code point <code>c</code>
1109    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1110    * using bitwise comparison.
1111    * @param c The code unit to search for.
1112    * @param start the offset into this at which to start matching
1113    * @param length the number of characters in this to search
1114    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1115    * @stable ICU 2.0
1116    */
1117   inline int32_t indexOf(UChar c,
1118               int32_t start,
1119               int32_t length) const;
1120
1121   /**
1122    * Locate in this the first occurrence of the code point <TT>c</TT>
1123    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1124    * using bitwise comparison.
1125    *
1126    * @param c The code point to search for.
1127    * @param start the offset into this at which to start matching
1128    * @param length the number of characters in this to search
1129    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1130    * @stable ICU 2.0
1131    */
1132   inline int32_t indexOf(UChar32 c,
1133               int32_t start,
1134               int32_t length) const;
1135
1136   /**
1137    * Locate in this the last occurrence of the characters in <TT>text</TT>,
1138    * using bitwise comparison.
1139    * @param text The text to search for.
1140    * @return The offset into this of the start of <TT>text</TT>,
1141    * or -1 if not found.
1142    * @stable ICU 2.0
1143    */
1144   inline int32_t lastIndexOf(const UnicodeString& text) const;
1145
1146   /**
1147    * Locate in this the last occurrence of the characters in <TT>text</TT>
1148    * starting at offset <TT>start</TT>, using bitwise comparison.
1149    * @param text The text to search for.
1150    * @param start The offset at which searching will start.
1151    * @return The offset into this of the start of <TT>text</TT>,
1152    * or -1 if not found.
1153    * @stable ICU 2.0
1154    */
1155   inline int32_t lastIndexOf(const UnicodeString& text,
1156               int32_t start) const;
1157
1158   /**
1159    * Locate in this the last occurrence in the range
1160    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1161    * in <TT>text</TT>, using bitwise comparison.
1162    * @param text The text to search for.
1163    * @param start The offset at which searching will start.
1164    * @param length The number of characters to search
1165    * @return The offset into this of the start of <TT>text</TT>,
1166    * or -1 if not found.
1167    * @stable ICU 2.0
1168    */
1169   inline int32_t lastIndexOf(const UnicodeString& text,
1170               int32_t start,
1171               int32_t length) const;
1172
1173   /**
1174    * Locate in this the last occurrence in the range
1175    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1176    * in <TT>srcText</TT> in the range
1177    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1178    * using bitwise comparison.
1179    * @param srcText The text to search for.
1180    * @param srcStart the offset into <TT>srcText</TT> at which
1181    * to start matching
1182    * @param srcLength the number of characters in <TT>srcText</TT> to match
1183    * @param start the offset into this at which to start matching
1184    * @param length the number of characters in this to search
1185    * @return The offset into this of the start of <TT>text</TT>,
1186    * or -1 if not found.
1187    * @stable ICU 2.0
1188    */
1189   inline int32_t lastIndexOf(const UnicodeString& srcText,
1190               int32_t srcStart,
1191               int32_t srcLength,
1192               int32_t start,
1193               int32_t length) const;
1194
1195   /**
1196    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1197    * starting at offset <TT>start</TT>, using bitwise comparison.
1198    * @param srcChars The text to search for.
1199    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1200    * @param start the offset into this at which to start matching
1201    * @return The offset into this of the start of <TT>text</TT>,
1202    * or -1 if not found.
1203    * @stable ICU 2.0
1204    */
1205   inline int32_t lastIndexOf(const UChar *srcChars,
1206               int32_t srcLength,
1207               int32_t start) const;
1208
1209   /**
1210    * Locate in this the last occurrence in the range
1211    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1212    * in <TT>srcChars</TT>, using bitwise comparison.
1213    * @param srcChars The text to search for.
1214    * @param srcLength the number of characters in <TT>srcChars</TT>
1215    * @param start The offset at which searching will start.
1216    * @param length The number of characters to search
1217    * @return The offset into this of the start of <TT>srcChars</TT>,
1218    * or -1 if not found.
1219    * @stable ICU 2.0
1220    */
1221   inline int32_t lastIndexOf(const UChar *srcChars,
1222               int32_t srcLength,
1223               int32_t start,
1224               int32_t length) const;
1225
1226   /**
1227    * Locate in this the last occurrence in the range
1228    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1229    * in <TT>srcChars</TT> in the range
1230    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1231    * using bitwise comparison.
1232    * @param srcChars The text to search for.
1233    * @param srcStart the offset into <TT>srcChars</TT> at which
1234    * to start matching
1235    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1236    * @param start the offset into this at which to start matching
1237    * @param length the number of characters in this to search
1238    * @return The offset into this of the start of <TT>text</TT>,
1239    * or -1 if not found.
1240    * @stable ICU 2.0
1241    */
1242   int32_t lastIndexOf(const UChar *srcChars,
1243               int32_t srcStart,
1244               int32_t srcLength,
1245               int32_t start,
1246               int32_t length) const;
1247
1248   /**
1249    * Locate in this the last occurrence of the BMP code point <code>c</code>,
1250    * using bitwise comparison.
1251    * @param c The code unit to search for.
1252    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1253    * @stable ICU 2.0
1254    */
1255   inline int32_t lastIndexOf(UChar c) const;
1256
1257   /**
1258    * Locate in this the last occurrence of the code point <TT>c</TT>,
1259    * using bitwise comparison.
1260    *
1261    * @param c The code point to search for.
1262    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1263    * @stable ICU 2.0
1264    */
1265   inline int32_t lastIndexOf(UChar32 c) const;
1266
1267   /**
1268    * Locate in this the last occurrence of the BMP code point <code>c</code>
1269    * starting at offset <TT>start</TT>, using bitwise comparison.
1270    * @param c The code unit to search for.
1271    * @param start The offset at which searching will start.
1272    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1273    * @stable ICU 2.0
1274    */
1275   inline int32_t lastIndexOf(UChar c,
1276               int32_t start) const;
1277
1278   /**
1279    * Locate in this the last occurrence of the code point <TT>c</TT>
1280    * starting at offset <TT>start</TT>, using bitwise comparison.
1281    *
1282    * @param c The code point to search for.
1283    * @param start The offset at which searching will start.
1284    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1285    * @stable ICU 2.0
1286    */
1287   inline int32_t lastIndexOf(UChar32 c,
1288               int32_t start) const;
1289
1290   /**
1291    * Locate in this the last occurrence of the BMP code point <code>c</code>
1292    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1293    * using bitwise comparison.
1294    * @param c The code unit to search for.
1295    * @param start the offset into this at which to start matching
1296    * @param length the number of characters in this to search
1297    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1298    * @stable ICU 2.0
1299    */
1300   inline int32_t lastIndexOf(UChar c,
1301               int32_t start,
1302               int32_t length) const;
1303
1304   /**
1305    * Locate in this the last occurrence of the code point <TT>c</TT>
1306    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1307    * using bitwise comparison.
1308    *
1309    * @param c The code point to search for.
1310    * @param start the offset into this at which to start matching
1311    * @param length the number of characters in this to search
1312    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1313    * @stable ICU 2.0
1314    */
1315   inline int32_t lastIndexOf(UChar32 c,
1316               int32_t start,
1317               int32_t length) const;
1318
1319
1320   /* Character access */
1321
1322   /**
1323    * Return the code unit at offset <tt>offset</tt>.
1324    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1325    * @param offset a valid offset into the text
1326    * @return the code unit at offset <tt>offset</tt>
1327    *         or 0xffff if the offset is not valid for this string
1328    * @stable ICU 2.0
1329    */
1330   inline UChar charAt(int32_t offset) const;
1331
1332   /**
1333    * Return the code unit at offset <tt>offset</tt>.
1334    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1335    * @param offset a valid offset into the text
1336    * @return the code unit at offset <tt>offset</tt>
1337    * @stable ICU 2.0
1338    */
1339   inline UChar operator[] (int32_t offset) const;
1340
1341   /**
1342    * Return the code point that contains the code unit
1343    * at offset <tt>offset</tt>.
1344    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1345    * @param offset a valid offset into the text
1346    * that indicates the text offset of any of the code units
1347    * that will be assembled into a code point (21-bit value) and returned
1348    * @return the code point of text at <tt>offset</tt>
1349    *         or 0xffff if the offset is not valid for this string
1350    * @stable ICU 2.0
1351    */
1352   UChar32 char32At(int32_t offset) const;
1353
1354   /**
1355    * Adjust a random-access offset so that
1356    * it points to the beginning of a Unicode character.
1357    * The offset that is passed in points to
1358    * any code unit of a code point,
1359    * while the returned offset will point to the first code unit
1360    * of the same code point.
1361    * In UTF-16, if the input offset points to a second surrogate
1362    * of a surrogate pair, then the returned offset will point
1363    * to the first surrogate.
1364    * @param offset a valid offset into one code point of the text
1365    * @return offset of the first code unit of the same code point
1366    * @see U16_SET_CP_START
1367    * @stable ICU 2.0
1368    */
1369   int32_t getChar32Start(int32_t offset) const;
1370
1371   /**
1372    * Adjust a random-access offset so that
1373    * it points behind a Unicode character.
1374    * The offset that is passed in points behind
1375    * any code unit of a code point,
1376    * while the returned offset will point behind the last code unit
1377    * of the same code point.
1378    * In UTF-16, if the input offset points behind the first surrogate
1379    * (i.e., to the second surrogate)
1380    * of a surrogate pair, then the returned offset will point
1381    * behind the second surrogate (i.e., to the first surrogate).
1382    * @param offset a valid offset after any code unit of a code point of the text
1383    * @return offset of the first code unit after the same code point
1384    * @see U16_SET_CP_LIMIT
1385    * @stable ICU 2.0
1386    */
1387   int32_t getChar32Limit(int32_t offset) const;
1388
1389   /**
1390    * Move the code unit index along the string by delta code points.
1391    * Interpret the input index as a code unit-based offset into the string,
1392    * move the index forward or backward by delta code points, and
1393    * return the resulting index.
1394    * The input index should point to the first code unit of a code point,
1395    * if there is more than one.
1396    *
1397    * Both input and output indexes are code unit-based as for all
1398    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1399    * If delta<0 then the index is moved backward (toward the start of the string).
1400    * If delta>0 then the index is moved forward (toward the end of the string).
1401    *
1402    * This behaves like CharacterIterator::move32(delta, kCurrent).
1403    *
1404    * Behavior for out-of-bounds indexes:
1405    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1406    * if the input index<0 then it is pinned to 0;
1407    * if it is index>length() then it is pinned to length().
1408    * Afterwards, the index is moved by <code>delta</code> code points
1409    * forward or backward,
1410    * but no further backward than to 0 and no further forward than to length().
1411    * The resulting index return value will be in between 0 and length(), inclusively.
1412    *
1413    * Examples:
1414    * <pre>
1415    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1416    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1417    *
1418    * // initial index: position of U+10000
1419    * int32_t index=1;
1420    *
1421    * // the following examples will all result in index==4, position of U+10ffff
1422    *
1423    * // skip 2 code points from some position in the string
1424    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1425    *
1426    * // go to the 3rd code point from the start of s (0-based)
1427    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1428    *
1429    * // go to the next-to-last code point of s
1430    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1431    * </pre>
1432    *
1433    * @param index input code unit index
1434    * @param delta (signed) code point count to move the index forward or backward
1435    *        in the string
1436    * @return the resulting code unit index
1437    * @stable ICU 2.0
1438    */
1439   int32_t moveIndex32(int32_t index, int32_t delta) const;
1440
1441   /* Substring extraction */
1442
1443   /**
1444    * Copy the characters in the range
1445    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1446    * beginning at <tt>dstStart</tt>.
1447    * If the string aliases to <code>dst</code> itself as an external buffer,
1448    * then extract() will not copy the contents.
1449    *
1450    * @param start offset of first character which will be copied into the array
1451    * @param length the number of characters to extract
1452    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1453    * must be at least (<tt>dstStart + length</tt>).
1454    * @param dstStart the offset in <TT>dst</TT> where the first character
1455    * will be extracted
1456    * @stable ICU 2.0
1457    */
1458   inline void extract(int32_t start,
1459            int32_t length,
1460            UChar *dst,
1461            int32_t dstStart = 0) const;
1462
1463   /**
1464    * Copy the contents of the string into dest.
1465    * This is a convenience function that
1466    * checks if there is enough space in dest,
1467    * extracts the entire string if possible,
1468    * and NUL-terminates dest if possible.
1469    *
1470    * If the string fits into dest but cannot be NUL-terminated
1471    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1472    * If the string itself does not fit into dest
1473    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1474    *
1475    * If the string aliases to <code>dest</code> itself as an external buffer,
1476    * then extract() will not copy the contents.
1477    *
1478    * @param dest Destination string buffer.
1479    * @param destCapacity Number of UChars available at dest.
1480    * @param errorCode ICU error code.
1481    * @return length()
1482    * @stable ICU 2.0
1483    */
1484   int32_t
1485   extract(UChar *dest, int32_t destCapacity,
1486           UErrorCode &errorCode) const;
1487
1488   /**
1489    * Copy the characters in the range
1490    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1491    * <tt>target</tt>.
1492    * @param start offset of first character which will be copied
1493    * @param length the number of characters to extract
1494    * @param target UnicodeString into which to copy characters.
1495    * @return A reference to <TT>target</TT>
1496    * @stable ICU 2.0
1497    */
1498   inline void extract(int32_t start,
1499            int32_t length,
1500            UnicodeString& target) const;
1501
1502   /**
1503    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1504    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1505    * @param start offset of first character which will be copied into the array
1506    * @param limit offset immediately following the last character to be copied
1507    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1508    * must be at least (<tt>dstStart + (limit - start)</tt>).
1509    * @param dstStart the offset in <TT>dst</TT> where the first character
1510    * will be extracted
1511    * @stable ICU 2.0
1512    */
1513   inline void extractBetween(int32_t start,
1514               int32_t limit,
1515               UChar *dst,
1516               int32_t dstStart = 0) const;
1517
1518   /**
1519    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1520    * into the UnicodeString <tt>target</tt>.  Replaceable API.
1521    * @param start offset of first character which will be copied
1522    * @param limit offset immediately following the last character to be copied
1523    * @param target UnicodeString into which to copy characters.
1524    * @return A reference to <TT>target</TT>
1525    * @stable ICU 2.0
1526    */
1527   virtual void extractBetween(int32_t start,
1528               int32_t limit,
1529               UnicodeString& target) const;
1530
1531   /**
1532    * Copy the characters in the range
1533    * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1534    * All characters must be invariant (see utypes.h).
1535    * Use US_INV as the last, signature-distinguishing parameter.
1536    *
1537    * This function does not write any more than <code>targetCapacity</code>
1538    * characters but returns the length of the entire output string
1539    * so that one can allocate a larger buffer and call the function again
1540    * if necessary.
1541    * The output string is NUL-terminated if possible.
1542    *
1543    * @param start offset of first character which will be copied
1544    * @param startLength the number of characters to extract
1545    * @param target the target buffer for extraction, can be NULL
1546    *               if targetLength is 0
1547    * @param targetCapacity the length of the target buffer
1548    * @param inv Signature-distinguishing paramater, use US_INV.
1549    * @return the output string length, not including the terminating NUL
1550    * @stable ICU 3.2
1551    */
1552   int32_t extract(int32_t start,
1553            int32_t startLength,
1554            char *target,
1555            int32_t targetCapacity,
1556            enum EInvariant inv) const;
1557
1558 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1559
1560   /**
1561    * Copy the characters in the range
1562    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1563    * in the platform's default codepage.
1564    * This function does not write any more than <code>targetLength</code>
1565    * characters but returns the length of the entire output string
1566    * so that one can allocate a larger buffer and call the function again
1567    * if necessary.
1568    * The output string is NUL-terminated if possible.
1569    *
1570    * @param start offset of first character which will be copied
1571    * @param startLength the number of characters to extract
1572    * @param target the target buffer for extraction
1573    * @param targetLength the length of the target buffer
1574    * If <TT>target</TT> is NULL, then the number of bytes required for
1575    * <TT>target</TT> is returned.
1576    * @return the output string length, not including the terminating NUL
1577    * @stable ICU 2.0
1578    */
1579   int32_t extract(int32_t start,
1580            int32_t startLength,
1581            char *target,
1582            uint32_t targetLength) const;
1583
1584 #endif
1585
1586 #if !UCONFIG_NO_CONVERSION
1587
1588   /**
1589    * Copy the characters in the range
1590    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1591    * in a specified codepage.
1592    * The output string is NUL-terminated.
1593    *
1594    * Recommendation: For invariant-character strings use
1595    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1596    * because it avoids object code dependencies of UnicodeString on
1597    * the conversion code.
1598    *
1599    * @param start offset of first character which will be copied
1600    * @param startLength the number of characters to extract
1601    * @param target the target buffer for extraction
1602    * @param codepage the desired codepage for the characters.  0 has
1603    * the special meaning of the default codepage
1604    * If <code>codepage</code> is an empty string (<code>""</code>),
1605    * then a simple conversion is performed on the codepage-invariant
1606    * subset ("invariant characters") of the platform encoding. See utypes.h.
1607    * If <TT>target</TT> is NULL, then the number of bytes required for
1608    * <TT>target</TT> is returned. It is assumed that the target is big enough
1609    * to fit all of the characters.
1610    * @return the output string length, not including the terminating NUL
1611    * @stable ICU 2.0
1612    */
1613   inline int32_t extract(int32_t start,
1614                  int32_t startLength,
1615                  char *target,
1616                  const char *codepage = 0) const;
1617
1618   /**
1619    * Copy the characters in the range
1620    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1621    * in a specified codepage.
1622    * This function does not write any more than <code>targetLength</code>
1623    * characters but returns the length of the entire output string
1624    * so that one can allocate a larger buffer and call the function again
1625    * if necessary.
1626    * The output string is NUL-terminated if possible.
1627    *
1628    * Recommendation: For invariant-character strings use
1629    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1630    * because it avoids object code dependencies of UnicodeString on
1631    * the conversion code.
1632    *
1633    * @param start offset of first character which will be copied
1634    * @param startLength the number of characters to extract
1635    * @param target the target buffer for extraction
1636    * @param targetLength the length of the target buffer
1637    * @param codepage the desired codepage for the characters.  0 has
1638    * the special meaning of the default codepage
1639    * If <code>codepage</code> is an empty string (<code>""</code>),
1640    * then a simple conversion is performed on the codepage-invariant
1641    * subset ("invariant characters") of the platform encoding. See utypes.h.
1642    * If <TT>target</TT> is NULL, then the number of bytes required for
1643    * <TT>target</TT> is returned.
1644    * @return the output string length, not including the terminating NUL
1645    * @stable ICU 2.0
1646    */
1647   int32_t extract(int32_t start,
1648            int32_t startLength,
1649            char *target,
1650            uint32_t targetLength,
1651            const char *codepage) const;
1652
1653   /**
1654    * Convert the UnicodeString into a codepage string using an existing UConverter.
1655    * The output string is NUL-terminated if possible.
1656    *
1657    * This function avoids the overhead of opening and closing a converter if
1658    * multiple strings are extracted.
1659    *
1660    * @param dest destination string buffer, can be NULL if destCapacity==0
1661    * @param destCapacity the number of chars available at dest
1662    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1663    *        or NULL for the default converter
1664    * @param errorCode normal ICU error code
1665    * @return the length of the output string, not counting the terminating NUL;
1666    *         if the length is greater than destCapacity, then the string will not fit
1667    *         and a buffer of the indicated length would need to be passed in
1668    * @stable ICU 2.0
1669    */
1670   int32_t extract(char *dest, int32_t destCapacity,
1671                   UConverter *cnv,
1672                   UErrorCode &errorCode) const;
1673
1674 #endif
1675
1676   /**
1677    * Create a temporary substring for the specified range.
1678    * Unlike the substring constructor and setTo() functions,
1679    * the object returned here will be a read-only alias (using getBuffer())
1680    * rather than copying the text.
1681    * As a result, this substring operation is much faster but requires
1682    * that the original string not be modified or deleted during the lifetime
1683    * of the returned substring object.
1684    * @param start offset of the first character visible in the substring
1685    * @param length length of the substring
1686    * @return a read-only alias UnicodeString object for the substring
1687    * @stable ICU 4.4
1688    */
1689   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1690
1691   /**
1692    * Create a temporary substring for the specified range.
1693    * Same as tempSubString(start, length) except that the substring range
1694    * is specified as a (start, limit) pair (with an exclusive limit index)
1695    * rather than a (start, length) pair.
1696    * @param start offset of the first character visible in the substring
1697    * @param limit offset immediately following the last character visible in the substring
1698    * @return a read-only alias UnicodeString object for the substring
1699    * @stable ICU 4.4
1700    */
1701   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1702
1703   /**
1704    * Convert the UnicodeString to UTF-8 and write the result
1705    * to a ByteSink. This is called by toUTF8String().
1706    * Unpaired surrogates are replaced with U+FFFD.
1707    * Calls u_strToUTF8WithSub().
1708    *
1709    * @param sink A ByteSink to which the UTF-8 version of the string is written.
1710    *             sink.Flush() is called at the end.
1711    * @stable ICU 4.2
1712    * @see toUTF8String
1713    */
1714   void toUTF8(ByteSink &sink) const;
1715
1716 #if U_HAVE_STD_STRING
1717
1718   /**
1719    * Convert the UnicodeString to UTF-8 and append the result
1720    * to a standard string.
1721    * Unpaired surrogates are replaced with U+FFFD.
1722    * Calls toUTF8().
1723    *
1724    * @param result A standard string (or a compatible object)
1725    *        to which the UTF-8 version of the string is appended.
1726    * @return The string object.
1727    * @stable ICU 4.2
1728    * @see toUTF8
1729    */
1730   template<typename StringClass>
1731   StringClass &toUTF8String(StringClass &result) const {
1732     StringByteSink<StringClass> sbs(&result);
1733     toUTF8(sbs);
1734     return result;
1735   }
1736
1737 #endif
1738
1739   /**
1740    * Convert the UnicodeString to UTF-32.
1741    * Unpaired surrogates are replaced with U+FFFD.
1742    * Calls u_strToUTF32WithSub().
1743    *
1744    * @param utf32 destination string buffer, can be NULL if capacity==0
1745    * @param capacity the number of UChar32s available at utf32
1746    * @param errorCode Standard ICU error code. Its input value must
1747    *                  pass the U_SUCCESS() test, or else the function returns
1748    *                  immediately. Check for U_FAILURE() on output or use with
1749    *                  function chaining. (See User Guide for details.)
1750    * @return The length of the UTF-32 string.
1751    * @see fromUTF32
1752    * @stable ICU 4.2
1753    */
1754   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1755
1756   /* Length operations */
1757
1758   /**
1759    * Return the length of the UnicodeString object.
1760    * The length is the number of UChar code units are in the UnicodeString.
1761    * If you want the number of code points, please use countChar32().
1762    * @return the length of the UnicodeString object
1763    * @see countChar32
1764    * @stable ICU 2.0
1765    */
1766   inline int32_t length(void) const;
1767
1768   /**
1769    * Count Unicode code points in the length UChar code units of the string.
1770    * A code point may occupy either one or two UChar code units.
1771    * Counting code points involves reading all code units.
1772    *
1773    * This functions is basically the inverse of moveIndex32().
1774    *
1775    * @param start the index of the first code unit to check
1776    * @param length the number of UChar code units to check
1777    * @return the number of code points in the specified code units
1778    * @see length
1779    * @stable ICU 2.0
1780    */
1781   int32_t
1782   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1783
1784   /**
1785    * Check if the length UChar code units of the string
1786    * contain more Unicode code points than a certain number.
1787    * This is more efficient than counting all code points in this part of the string
1788    * and comparing that number with a threshold.
1789    * This function may not need to scan the string at all if the length
1790    * falls within a certain range, and
1791    * never needs to count more than 'number+1' code points.
1792    * Logically equivalent to (countChar32(start, length)>number).
1793    * A Unicode code point may occupy either one or two UChar code units.
1794    *
1795    * @param start the index of the first code unit to check (0 for the entire string)
1796    * @param length the number of UChar code units to check
1797    *               (use INT32_MAX for the entire string; remember that start/length
1798    *                values are pinned)
1799    * @param number The number of code points in the (sub)string is compared against
1800    *               the 'number' parameter.
1801    * @return Boolean value for whether the string contains more Unicode code points
1802    *         than 'number'. Same as (u_countChar32(s, length)>number).
1803    * @see countChar32
1804    * @see u_strHasMoreChar32Than
1805    * @stable ICU 2.4
1806    */
1807   UBool
1808   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1809
1810   /**
1811    * Determine if this string is empty.
1812    * @return TRUE if this string contains 0 characters, FALSE otherwise.
1813    * @stable ICU 2.0
1814    */
1815   inline UBool isEmpty(void) const;
1816
1817   /**
1818    * Return the capacity of the internal buffer of the UnicodeString object.
1819    * This is useful together with the getBuffer functions.
1820    * See there for details.
1821    *
1822    * @return the number of UChars available in the internal buffer
1823    * @see getBuffer
1824    * @stable ICU 2.0
1825    */
1826   inline int32_t getCapacity(void) const;
1827
1828   /* Other operations */
1829
1830   /**
1831    * Generate a hash code for this object.
1832    * @return The hash code of this UnicodeString.
1833    * @stable ICU 2.0
1834    */
1835   inline int32_t hashCode(void) const;
1836
1837   /**
1838    * Determine if this object contains a valid string.
1839    * A bogus string has no value. It is different from an empty string,
1840    * although in both cases isEmpty() returns TRUE and length() returns 0.
1841    * setToBogus() and isBogus() can be used to indicate that no string value is available.
1842    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1843    * length() returns 0.
1844    *
1845    * @return TRUE if the string is bogus/invalid, FALSE otherwise
1846    * @see setToBogus()
1847    * @stable ICU 2.0
1848    */
1849   inline UBool isBogus(void) const;
1850
1851
1852   //========================================
1853   // Write operations
1854   //========================================
1855
1856   /* Assignment operations */
1857
1858   /**
1859    * Assignment operator.  Replace the characters in this UnicodeString
1860    * with the characters from <TT>srcText</TT>.
1861    *
1862    * Starting with ICU 2.4, the assignment operator and the copy constructor
1863    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1864    * By contrast, the fastCopyFrom() function implements the old,
1865    * more efficient but less safe behavior
1866    * of making this string also a readonly alias to the same buffer.
1867    *
1868    * If the source object has an "open" buffer from getBuffer(minCapacity),
1869    * then the copy is an empty string.
1870    *
1871    * @param srcText The text containing the characters to replace
1872    * @return a reference to this
1873    * @stable ICU 2.0
1874    * @see fastCopyFrom
1875    */
1876   UnicodeString &operator=(const UnicodeString &srcText);
1877
1878   /**
1879    * Almost the same as the assignment operator.
1880    * Replace the characters in this UnicodeString
1881    * with the characters from <code>srcText</code>.
1882    *
1883    * This function works the same as the assignment operator
1884    * for all strings except for ones that are readonly aliases.
1885    *
1886    * Starting with ICU 2.4, the assignment operator and the copy constructor
1887    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1888    * This function implements the old, more efficient but less safe behavior
1889    * of making this string also a readonly alias to the same buffer.
1890    *
1891    * The fastCopyFrom function must be used only if it is known that the lifetime of
1892    * this UnicodeString does not exceed the lifetime of the aliased buffer
1893    * including its contents, for example for strings from resource bundles
1894    * or aliases to string constants.
1895    *
1896    * If the source object has an "open" buffer from getBuffer(minCapacity),
1897    * then the copy is an empty string.
1898    *
1899    * @param src The text containing the characters to replace.
1900    * @return a reference to this
1901    * @stable ICU 2.4
1902    */
1903   UnicodeString &fastCopyFrom(const UnicodeString &src);
1904
1905 #ifndef U_HIDE_DRAFT_API
1906 #if U_HAVE_RVALUE_REFERENCES
1907   /**
1908    * Move assignment operator, might leave src in bogus state.
1909    * This string will have the same contents and state that the source string had.
1910    * The behavior is undefined if *this and src are the same object.
1911    * @param src source string
1912    * @return *this
1913    * @draft ICU 56
1914    */
1915   UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1916     return moveFrom(src);
1917   }
1918 #endif
1919   /**
1920    * Move assignment, might leave src in bogus state.
1921    * This string will have the same contents and state that the source string had.
1922    * The behavior is undefined if *this and src are the same object.
1923    *
1924    * Can be called explicitly, does not need C++11 support.
1925    * @param src source string
1926    * @return *this
1927    * @draft ICU 56
1928    */
1929   UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1930
1931   /**
1932    * Swap strings.
1933    * @param other other string
1934    * @draft ICU 56
1935    */
1936   void swap(UnicodeString &other) U_NOEXCEPT;
1937 #endif  /* U_HIDE_DRAFT_API */
1938
1939   /**
1940    * Non-member UnicodeString swap function.
1941    * @param s1 will get s2's contents and state
1942    * @param s2 will get s1's contents and state
1943    * @draft ICU 56
1944    */
1945   friend U_COMMON_API inline void U_EXPORT2
1946   swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1947     s1.swap(s2);
1948   }
1949
1950   /**
1951    * Assignment operator.  Replace the characters in this UnicodeString
1952    * with the code unit <TT>ch</TT>.
1953    * @param ch the code unit to replace
1954    * @return a reference to this
1955    * @stable ICU 2.0
1956    */
1957   inline UnicodeString& operator= (UChar ch);
1958
1959   /**
1960    * Assignment operator.  Replace the characters in this UnicodeString
1961    * with the code point <TT>ch</TT>.
1962    * @param ch the code point to replace
1963    * @return a reference to this
1964    * @stable ICU 2.0
1965    */
1966   inline UnicodeString& operator= (UChar32 ch);
1967
1968   /**
1969    * Set the text in the UnicodeString object to the characters
1970    * in <TT>srcText</TT> in the range
1971    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1972    * <TT>srcText</TT> is not modified.
1973    * @param srcText the source for the new characters
1974    * @param srcStart the offset into <TT>srcText</TT> where new characters
1975    * will be obtained
1976    * @return a reference to this
1977    * @stable ICU 2.2
1978    */
1979   inline UnicodeString& setTo(const UnicodeString& srcText,
1980                int32_t srcStart);
1981
1982   /**
1983    * Set the text in the UnicodeString object to the characters
1984    * in <TT>srcText</TT> in the range
1985    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1986    * <TT>srcText</TT> is not modified.
1987    * @param srcText the source for the new characters
1988    * @param srcStart the offset into <TT>srcText</TT> where new characters
1989    * will be obtained
1990    * @param srcLength the number of characters in <TT>srcText</TT> in the
1991    * replace string.
1992    * @return a reference to this
1993    * @stable ICU 2.0
1994    */
1995   inline UnicodeString& setTo(const UnicodeString& srcText,
1996                int32_t srcStart,
1997                int32_t srcLength);
1998
1999   /**
2000    * Set the text in the UnicodeString object to the characters in
2001    * <TT>srcText</TT>.
2002    * <TT>srcText</TT> is not modified.
2003    * @param srcText the source for the new characters
2004    * @return a reference to this
2005    * @stable ICU 2.0
2006    */
2007   inline UnicodeString& setTo(const UnicodeString& srcText);
2008
2009   /**
2010    * Set the characters in the UnicodeString object to the characters
2011    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2012    * @param srcChars the source for the new characters
2013    * @param srcLength the number of Unicode characters in srcChars.
2014    * @return a reference to this
2015    * @stable ICU 2.0
2016    */
2017   inline UnicodeString& setTo(const UChar *srcChars,
2018                int32_t srcLength);
2019
2020   /**
2021    * Set the characters in the UnicodeString object to the code unit
2022    * <TT>srcChar</TT>.
2023    * @param srcChar the code unit which becomes the UnicodeString's character
2024    * content
2025    * @return a reference to this
2026    * @stable ICU 2.0
2027    */
2028   UnicodeString& setTo(UChar srcChar);
2029
2030   /**
2031    * Set the characters in the UnicodeString object to the code point
2032    * <TT>srcChar</TT>.
2033    * @param srcChar the code point which becomes the UnicodeString's character
2034    * content
2035    * @return a reference to this
2036    * @stable ICU 2.0
2037    */
2038   UnicodeString& setTo(UChar32 srcChar);
2039
2040   /**
2041    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
2042    * The text will be used for the UnicodeString object, but
2043    * it will not be released when the UnicodeString is destroyed.
2044    * This has copy-on-write semantics:
2045    * When the string is modified, then the buffer is first copied into
2046    * newly allocated memory.
2047    * The aliased buffer is never modified.
2048    *
2049    * In an assignment to another UnicodeString, when using the copy constructor
2050    * or the assignment operator, the text will be copied.
2051    * When using fastCopyFrom(), the text will be aliased again,
2052    * so that both strings then alias the same readonly-text.
2053    *
2054    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2055    *                     This must be true if <code>textLength==-1</code>.
2056    * @param text The characters to alias for the UnicodeString.
2057    * @param textLength The number of Unicode characters in <code>text</code> to alias.
2058    *                   If -1, then this constructor will determine the length
2059    *                   by calling <code>u_strlen()</code>.
2060    * @return a reference to this
2061    * @stable ICU 2.0
2062    */
2063   UnicodeString &setTo(UBool isTerminated,
2064                        const UChar *text,
2065                        int32_t textLength);
2066
2067   /**
2068    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
2069    * The text will be used for the UnicodeString object, but
2070    * it will not be released when the UnicodeString is destroyed.
2071    * This has write-through semantics:
2072    * For as long as the capacity of the buffer is sufficient, write operations
2073    * will directly affect the buffer. When more capacity is necessary, then
2074    * a new buffer will be allocated and the contents copied as with regularly
2075    * constructed strings.
2076    * In an assignment to another UnicodeString, the buffer will be copied.
2077    * The extract(UChar *dst) function detects whether the dst pointer is the same
2078    * as the string buffer itself and will in this case not copy the contents.
2079    *
2080    * @param buffer The characters to alias for the UnicodeString.
2081    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2082    * @param buffCapacity The size of <code>buffer</code> in UChars.
2083    * @return a reference to this
2084    * @stable ICU 2.0
2085    */
2086   UnicodeString &setTo(UChar *buffer,
2087                        int32_t buffLength,
2088                        int32_t buffCapacity);
2089
2090   /**
2091    * Make this UnicodeString object invalid.
2092    * The string will test TRUE with isBogus().
2093    *
2094    * A bogus string has no value. It is different from an empty string.
2095    * It can be used to indicate that no string value is available.
2096    * getBuffer() and getTerminatedBuffer() return NULL, and
2097    * length() returns 0.
2098    *
2099    * This utility function is used throughout the UnicodeString
2100    * implementation to indicate that a UnicodeString operation failed,
2101    * and may be used in other functions,
2102    * especially but not exclusively when such functions do not
2103    * take a UErrorCode for simplicity.
2104    *
2105    * The following methods, and no others, will clear a string object's bogus flag:
2106    * - remove()
2107    * - remove(0, INT32_MAX)
2108    * - truncate(0)
2109    * - operator=() (assignment operator)
2110    * - setTo(...)
2111    *
2112    * The simplest ways to turn a bogus string into an empty one
2113    * is to use the remove() function.
2114    * Examples for other functions that are equivalent to "set to empty string":
2115    * \code
2116    * if(s.isBogus()) {
2117    *   s.remove();           // set to an empty string (remove all), or
2118    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2119    *   s.truncate(0);        // set to an empty string (complete truncation), or
2120    *   s=UnicodeString();    // assign an empty string, or
2121    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2122    *   static const UChar nul=0;
2123    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2124    * }
2125    * \endcode
2126    *
2127    * @see isBogus()
2128    * @stable ICU 2.0
2129    */
2130   void setToBogus();
2131
2132   /**
2133    * Set the character at the specified offset to the specified character.
2134    * @param offset A valid offset into the text of the character to set
2135    * @param ch The new character
2136    * @return A reference to this
2137    * @stable ICU 2.0
2138    */
2139   UnicodeString& setCharAt(int32_t offset,
2140                UChar ch);
2141
2142
2143   /* Append operations */
2144
2145   /**
2146    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2147    * object.
2148    * @param ch the code unit to be appended
2149    * @return a reference to this
2150    * @stable ICU 2.0
2151    */
2152  inline  UnicodeString& operator+= (UChar ch);
2153
2154   /**
2155    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2156    * object.
2157    * @param ch the code point to be appended
2158    * @return a reference to this
2159    * @stable ICU 2.0
2160    */
2161  inline  UnicodeString& operator+= (UChar32 ch);
2162
2163   /**
2164    * Append operator. Append the characters in <TT>srcText</TT> to the
2165    * UnicodeString object. <TT>srcText</TT> is not modified.
2166    * @param srcText the source for the new characters
2167    * @return a reference to this
2168    * @stable ICU 2.0
2169    */
2170   inline UnicodeString& operator+= (const UnicodeString& srcText);
2171
2172   /**
2173    * Append the characters
2174    * in <TT>srcText</TT> in the range
2175    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2176    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2177    * is not modified.
2178    * @param srcText the source for the new characters
2179    * @param srcStart the offset into <TT>srcText</TT> where new characters
2180    * will be obtained
2181    * @param srcLength the number of characters in <TT>srcText</TT> in
2182    * the append string
2183    * @return a reference to this
2184    * @stable ICU 2.0
2185    */
2186   inline UnicodeString& append(const UnicodeString& srcText,
2187             int32_t srcStart,
2188             int32_t srcLength);
2189
2190   /**
2191    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2192    * <TT>srcText</TT> is not modified.
2193    * @param srcText the source for the new characters
2194    * @return a reference to this
2195    * @stable ICU 2.0
2196    */
2197   inline UnicodeString& append(const UnicodeString& srcText);
2198
2199   /**
2200    * Append the characters in <TT>srcChars</TT> in the range
2201    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2202    * object at offset
2203    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2204    * @param srcChars the source for the new characters
2205    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2206    * will be obtained
2207    * @param srcLength the number of characters in <TT>srcChars</TT> in
2208    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2209    * @return a reference to this
2210    * @stable ICU 2.0
2211    */
2212   inline UnicodeString& append(const UChar *srcChars,
2213             int32_t srcStart,
2214             int32_t srcLength);
2215
2216   /**
2217    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2218    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2219    * @param srcChars the source for the new characters
2220    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2221    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2222    * @return a reference to this
2223    * @stable ICU 2.0
2224    */
2225   inline UnicodeString& append(const UChar *srcChars,
2226             int32_t srcLength);
2227
2228   /**
2229    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2230    * @param srcChar the code unit to append
2231    * @return a reference to this
2232    * @stable ICU 2.0
2233    */
2234   inline UnicodeString& append(UChar srcChar);
2235
2236   /**
2237    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2238    * @param srcChar the code point to append
2239    * @return a reference to this
2240    * @stable ICU 2.0
2241    */
2242   UnicodeString& append(UChar32 srcChar);
2243
2244
2245   /* Insert operations */
2246
2247   /**
2248    * Insert the characters in <TT>srcText</TT> in the range
2249    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2250    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2251    * @param start the offset where the insertion begins
2252    * @param srcText the source for the new characters
2253    * @param srcStart the offset into <TT>srcText</TT> where new characters
2254    * will be obtained
2255    * @param srcLength the number of characters in <TT>srcText</TT> in
2256    * the insert string
2257    * @return a reference to this
2258    * @stable ICU 2.0
2259    */
2260   inline UnicodeString& insert(int32_t start,
2261             const UnicodeString& srcText,
2262             int32_t srcStart,
2263             int32_t srcLength);
2264
2265   /**
2266    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2267    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2268    * @param start the offset where the insertion begins
2269    * @param srcText the source for the new characters
2270    * @return a reference to this
2271    * @stable ICU 2.0
2272    */
2273   inline UnicodeString& insert(int32_t start,
2274             const UnicodeString& srcText);
2275
2276   /**
2277    * Insert the characters in <TT>srcChars</TT> in the range
2278    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2279    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2280    * @param start the offset at which the insertion begins
2281    * @param srcChars the source for the new characters
2282    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2283    * will be obtained
2284    * @param srcLength the number of characters in <TT>srcChars</TT>
2285    * in the insert string
2286    * @return a reference to this
2287    * @stable ICU 2.0
2288    */
2289   inline UnicodeString& insert(int32_t start,
2290             const UChar *srcChars,
2291             int32_t srcStart,
2292             int32_t srcLength);
2293
2294   /**
2295    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2296    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2297    * @param start the offset where the insertion begins
2298    * @param srcChars the source for the new characters
2299    * @param srcLength the number of Unicode characters in srcChars.
2300    * @return a reference to this
2301    * @stable ICU 2.0
2302    */
2303   inline UnicodeString& insert(int32_t start,
2304             const UChar *srcChars,
2305             int32_t srcLength);
2306
2307   /**
2308    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2309    * offset <TT>start</TT>.
2310    * @param start the offset at which the insertion occurs
2311    * @param srcChar the code unit to insert
2312    * @return a reference to this
2313    * @stable ICU 2.0
2314    */
2315   inline UnicodeString& insert(int32_t start,
2316             UChar srcChar);
2317
2318   /**
2319    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2320    * offset <TT>start</TT>.
2321    * @param start the offset at which the insertion occurs
2322    * @param srcChar the code point to insert
2323    * @return a reference to this
2324    * @stable ICU 2.0
2325    */
2326   inline UnicodeString& insert(int32_t start,
2327             UChar32 srcChar);
2328
2329
2330   /* Replace operations */
2331
2332   /**
2333    * Replace the characters in the range
2334    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2335    * <TT>srcText</TT> in the range
2336    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2337    * <TT>srcText</TT> is not modified.
2338    * @param start the offset at which the replace operation begins
2339    * @param length the number of characters to replace. The character at
2340    * <TT>start + length</TT> is not modified.
2341    * @param srcText the source for the new characters
2342    * @param srcStart the offset into <TT>srcText</TT> where new characters
2343    * will be obtained
2344    * @param srcLength the number of characters in <TT>srcText</TT> in
2345    * the replace string
2346    * @return a reference to this
2347    * @stable ICU 2.0
2348    */
2349   UnicodeString& replace(int32_t start,
2350              int32_t length,
2351              const UnicodeString& srcText,
2352              int32_t srcStart,
2353              int32_t srcLength);
2354
2355   /**
2356    * Replace the characters in the range
2357    * [<TT>start</TT>, <TT>start + length</TT>)
2358    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2359    *  not modified.
2360    * @param start the offset at which the replace operation begins
2361    * @param length the number of characters to replace. The character at
2362    * <TT>start + length</TT> is not modified.
2363    * @param srcText the source for the new characters
2364    * @return a reference to this
2365    * @stable ICU 2.0
2366    */
2367   UnicodeString& replace(int32_t start,
2368              int32_t length,
2369              const UnicodeString& srcText);
2370
2371   /**
2372    * Replace the characters in the range
2373    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2374    * <TT>srcChars</TT> in the range
2375    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2376    * is not modified.
2377    * @param start the offset at which the replace operation begins
2378    * @param length the number of characters to replace.  The character at
2379    * <TT>start + length</TT> is not modified.
2380    * @param srcChars the source for the new characters
2381    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2382    * will be obtained
2383    * @param srcLength the number of characters in <TT>srcChars</TT>
2384    * in the replace string
2385    * @return a reference to this
2386    * @stable ICU 2.0
2387    */
2388   UnicodeString& replace(int32_t start,
2389              int32_t length,
2390              const UChar *srcChars,
2391              int32_t srcStart,
2392              int32_t srcLength);
2393
2394   /**
2395    * Replace the characters in the range
2396    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2397    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2398    * @param start the offset at which the replace operation begins
2399    * @param length number of characters to replace.  The character at
2400    * <TT>start + length</TT> is not modified.
2401    * @param srcChars the source for the new characters
2402    * @param srcLength the number of Unicode characters in srcChars
2403    * @return a reference to this
2404    * @stable ICU 2.0
2405    */
2406   inline UnicodeString& replace(int32_t start,
2407              int32_t length,
2408              const UChar *srcChars,
2409              int32_t srcLength);
2410
2411   /**
2412    * Replace the characters in the range
2413    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2414    * <TT>srcChar</TT>.
2415    * @param start the offset at which the replace operation begins
2416    * @param length the number of characters to replace.  The character at
2417    * <TT>start + length</TT> is not modified.
2418    * @param srcChar the new code unit
2419    * @return a reference to this
2420    * @stable ICU 2.0
2421    */
2422   inline UnicodeString& replace(int32_t start,
2423              int32_t length,
2424              UChar srcChar);
2425
2426   /**
2427    * Replace the characters in the range
2428    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2429    * <TT>srcChar</TT>.
2430    * @param start the offset at which the replace operation begins
2431    * @param length the number of characters to replace.  The character at
2432    * <TT>start + length</TT> is not modified.
2433    * @param srcChar the new code point
2434    * @return a reference to this
2435    * @stable ICU 2.0
2436    */
2437   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2438
2439   /**
2440    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2441    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2442    * @param start the offset at which the replace operation begins
2443    * @param limit the offset immediately following the replace range
2444    * @param srcText the source for the new characters
2445    * @return a reference to this
2446    * @stable ICU 2.0
2447    */
2448   inline UnicodeString& replaceBetween(int32_t start,
2449                 int32_t limit,
2450                 const UnicodeString& srcText);
2451
2452   /**
2453    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2454    * with the characters in <TT>srcText</TT> in the range
2455    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2456    * @param start the offset at which the replace operation begins
2457    * @param limit the offset immediately following the replace range
2458    * @param srcText the source for the new characters
2459    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2460    * will be obtained
2461    * @param srcLimit the offset immediately following the range to copy
2462    * in <TT>srcText</TT>
2463    * @return a reference to this
2464    * @stable ICU 2.0
2465    */
2466   inline UnicodeString& replaceBetween(int32_t start,
2467                 int32_t limit,
2468                 const UnicodeString& srcText,
2469                 int32_t srcStart,
2470                 int32_t srcLimit);
2471
2472   /**
2473    * Replace a substring of this object with the given text.
2474    * @param start the beginning index, inclusive; <code>0 <= start
2475    * <= limit</code>.
2476    * @param limit the ending index, exclusive; <code>start <= limit
2477    * <= length()</code>.
2478    * @param text the text to replace characters <code>start</code>
2479    * to <code>limit - 1</code>
2480    * @stable ICU 2.0
2481    */
2482   virtual void handleReplaceBetween(int32_t start,
2483                                     int32_t limit,
2484                                     const UnicodeString& text);
2485
2486   /**
2487    * Replaceable API
2488    * @return TRUE if it has MetaData
2489    * @stable ICU 2.4
2490    */
2491   virtual UBool hasMetaData() const;
2492
2493   /**
2494    * Copy a substring of this object, retaining attribute (out-of-band)
2495    * information.  This method is used to duplicate or reorder substrings.
2496    * The destination index must not overlap the source range.
2497    *
2498    * @param start the beginning index, inclusive; <code>0 <= start <=
2499    * limit</code>.
2500    * @param limit the ending index, exclusive; <code>start <= limit <=
2501    * length()</code>.
2502    * @param dest the destination index.  The characters from
2503    * <code>start..limit-1</code> will be copied to <code>dest</code>.
2504    * Implementations of this method may assume that <code>dest <= start ||
2505    * dest >= limit</code>.
2506    * @stable ICU 2.0
2507    */
2508   virtual void copy(int32_t start, int32_t limit, int32_t dest);
2509
2510   /* Search and replace operations */
2511
2512   /**
2513    * Replace all occurrences of characters in oldText with the characters
2514    * in newText
2515    * @param oldText the text containing the search text
2516    * @param newText the text containing the replacement text
2517    * @return a reference to this
2518    * @stable ICU 2.0
2519    */
2520   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2521                 const UnicodeString& newText);
2522
2523   /**
2524    * Replace all occurrences of characters in oldText with characters
2525    * in newText
2526    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2527    * @param start the start of the range in which replace will performed
2528    * @param length the length of the range in which replace will be performed
2529    * @param oldText the text containing the search text
2530    * @param newText the text containing the replacement text
2531    * @return a reference to this
2532    * @stable ICU 2.0
2533    */
2534   inline UnicodeString& findAndReplace(int32_t start,
2535                 int32_t length,
2536                 const UnicodeString& oldText,
2537                 const UnicodeString& newText);
2538
2539   /**
2540    * Replace all occurrences of characters in oldText in the range
2541    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2542    * in newText in the range
2543    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2544    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2545    * @param start the start of the range in which replace will performed
2546    * @param length the length of the range in which replace will be performed
2547    * @param oldText the text containing the search text
2548    * @param oldStart the start of the search range in <TT>oldText</TT>
2549    * @param oldLength the length of the search range in <TT>oldText</TT>
2550    * @param newText the text containing the replacement text
2551    * @param newStart the start of the replacement range in <TT>newText</TT>
2552    * @param newLength the length of the replacement range in <TT>newText</TT>
2553    * @return a reference to this
2554    * @stable ICU 2.0
2555    */
2556   UnicodeString& findAndReplace(int32_t start,
2557                 int32_t length,
2558                 const UnicodeString& oldText,
2559                 int32_t oldStart,
2560                 int32_t oldLength,
2561                 const UnicodeString& newText,
2562                 int32_t newStart,
2563                 int32_t newLength);
2564
2565
2566   /* Remove operations */
2567
2568   /**
2569    * Remove all characters from the UnicodeString object.
2570    * @return a reference to this
2571    * @stable ICU 2.0
2572    */
2573   inline UnicodeString& remove(void);
2574
2575   /**
2576    * Remove the characters in the range
2577    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2578    * @param start the offset of the first character to remove
2579    * @param length the number of characters to remove
2580    * @return a reference to this
2581    * @stable ICU 2.0
2582    */
2583   inline UnicodeString& remove(int32_t start,
2584                                int32_t length = (int32_t)INT32_MAX);
2585
2586   /**
2587    * Remove the characters in the range
2588    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2589    * @param start the offset of the first character to remove
2590    * @param limit the offset immediately following the range to remove
2591    * @return a reference to this
2592    * @stable ICU 2.0
2593    */
2594   inline UnicodeString& removeBetween(int32_t start,
2595                                       int32_t limit = (int32_t)INT32_MAX);
2596
2597   /**
2598    * Retain only the characters in the range
2599    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2600    * Removes characters before <code>start</code> and at and after <code>limit</code>.
2601    * @param start the offset of the first character to retain
2602    * @param limit the offset immediately following the range to retain
2603    * @return a reference to this
2604    * @stable ICU 4.4
2605    */
2606   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2607
2608   /* Length operations */
2609
2610   /**
2611    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2612    * If the length of this UnicodeString is less than targetLength,
2613    * length() - targetLength copies of padChar will be added to the
2614    * beginning of this UnicodeString.
2615    * @param targetLength the desired length of the string
2616    * @param padChar the character to use for padding. Defaults to
2617    * space (U+0020)
2618    * @return TRUE if the text was padded, FALSE otherwise.
2619    * @stable ICU 2.0
2620    */
2621   UBool padLeading(int32_t targetLength,
2622                     UChar padChar = 0x0020);
2623
2624   /**
2625    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2626    * If the length of this UnicodeString is less than targetLength,
2627    * length() - targetLength copies of padChar will be added to the
2628    * end of this UnicodeString.
2629    * @param targetLength the desired length of the string
2630    * @param padChar the character to use for padding. Defaults to
2631    * space (U+0020)
2632    * @return TRUE if the text was padded, FALSE otherwise.
2633    * @stable ICU 2.0
2634    */
2635   UBool padTrailing(int32_t targetLength,
2636                      UChar padChar = 0x0020);
2637
2638   /**
2639    * Truncate this UnicodeString to the <TT>targetLength</TT>.
2640    * @param targetLength the desired length of this UnicodeString.
2641    * @return TRUE if the text was truncated, FALSE otherwise
2642    * @stable ICU 2.0
2643    */
2644   inline UBool truncate(int32_t targetLength);
2645
2646   /**
2647    * Trims leading and trailing whitespace from this UnicodeString.
2648    * @return a reference to this
2649    * @stable ICU 2.0
2650    */
2651   UnicodeString& trim(void);
2652
2653
2654   /* Miscellaneous operations */
2655
2656   /**
2657    * Reverse this UnicodeString in place.
2658    * @return a reference to this
2659    * @stable ICU 2.0
2660    */
2661   inline UnicodeString& reverse(void);
2662
2663   /**
2664    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2665    * this UnicodeString.
2666    * @param start the start of the range to reverse
2667    * @param length the number of characters to to reverse
2668    * @return a reference to this
2669    * @stable ICU 2.0
2670    */
2671   inline UnicodeString& reverse(int32_t start,
2672              int32_t length);
2673
2674   /**
2675    * Convert the characters in this to UPPER CASE following the conventions of
2676    * the default locale.
2677    * @return A reference to this.
2678    * @stable ICU 2.0
2679    */
2680   UnicodeString& toUpper(void);
2681
2682   /**
2683    * Convert the characters in this to UPPER CASE following the conventions of
2684    * a specific locale.
2685    * @param locale The locale containing the conventions to use.
2686    * @return A reference to this.
2687    * @stable ICU 2.0
2688    */
2689   UnicodeString& toUpper(const Locale& locale);
2690
2691   /**
2692    * Convert the characters in this to lower case following the conventions of
2693    * the default locale.
2694    * @return A reference to this.
2695    * @stable ICU 2.0
2696    */
2697   UnicodeString& toLower(void);
2698
2699   /**
2700    * Convert the characters in this to lower case following the conventions of
2701    * a specific locale.
2702    * @param locale The locale containing the conventions to use.
2703    * @return A reference to this.
2704    * @stable ICU 2.0
2705    */
2706   UnicodeString& toLower(const Locale& locale);
2707
2708 #if !UCONFIG_NO_BREAK_ITERATION
2709
2710   /**
2711    * Titlecase this string, convenience function using the default locale.
2712    *
2713    * Casing is locale-dependent and context-sensitive.
2714    * Titlecasing uses a break iterator to find the first characters of words
2715    * that are to be titlecased. It titlecases those characters and lowercases
2716    * all others.
2717    *
2718    * The titlecase break iterator can be provided to customize for arbitrary
2719    * styles, using rules and dictionaries beyond the standard iterators.
2720    * It may be more efficient to always provide an iterator to avoid
2721    * opening and closing one for each string.
2722    * The standard titlecase iterator for the root locale implements the
2723    * algorithm of Unicode TR 21.
2724    *
2725    * This function uses only the setText(), first() and next() methods of the
2726    * provided break iterator.
2727    *
2728    * @param titleIter A break iterator to find the first characters of words
2729    *                  that are to be titlecased.
2730    *                  If none is provided (0), then a standard titlecase
2731    *                  break iterator is opened.
2732    *                  Otherwise the provided iterator is set to the string's text.
2733    * @return A reference to this.
2734    * @stable ICU 2.1
2735    */
2736   UnicodeString &toTitle(BreakIterator *titleIter);
2737
2738   /**
2739    * Titlecase this string.
2740    *
2741    * Casing is locale-dependent and context-sensitive.
2742    * Titlecasing uses a break iterator to find the first characters of words
2743    * that are to be titlecased. It titlecases those characters and lowercases
2744    * all others.
2745    *
2746    * The titlecase break iterator can be provided to customize for arbitrary
2747    * styles, using rules and dictionaries beyond the standard iterators.
2748    * It may be more efficient to always provide an iterator to avoid
2749    * opening and closing one for each string.
2750    * The standard titlecase iterator for the root locale implements the
2751    * algorithm of Unicode TR 21.
2752    *
2753    * This function uses only the setText(), first() and next() methods of the
2754    * provided break iterator.
2755    *
2756    * @param titleIter A break iterator to find the first characters of words
2757    *                  that are to be titlecased.
2758    *                  If none is provided (0), then a standard titlecase
2759    *                  break iterator is opened.
2760    *                  Otherwise the provided iterator is set to the string's text.
2761    * @param locale    The locale to consider.
2762    * @return A reference to this.
2763    * @stable ICU 2.1
2764    */
2765   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2766
2767   /**
2768    * Titlecase this string, with options.
2769    *
2770    * Casing is locale-dependent and context-sensitive.
2771    * Titlecasing uses a break iterator to find the first characters of words
2772    * that are to be titlecased. It titlecases those characters and lowercases
2773    * all others. (This can be modified with options.)
2774    *
2775    * The titlecase break iterator can be provided to customize for arbitrary
2776    * styles, using rules and dictionaries beyond the standard iterators.
2777    * It may be more efficient to always provide an iterator to avoid
2778    * opening and closing one for each string.
2779    * The standard titlecase iterator for the root locale implements the
2780    * algorithm of Unicode TR 21.
2781    *
2782    * This function uses only the setText(), first() and next() methods of the
2783    * provided break iterator.
2784    *
2785    * @param titleIter A break iterator to find the first characters of words
2786    *                  that are to be titlecased.
2787    *                  If none is provided (0), then a standard titlecase
2788    *                  break iterator is opened.
2789    *                  Otherwise the provided iterator is set to the string's text.
2790    * @param locale    The locale to consider.
2791    * @param options Options bit set, see ucasemap_open().
2792    * @return A reference to this.
2793    * @see U_TITLECASE_NO_LOWERCASE
2794    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2795    * @see ucasemap_open
2796    * @stable ICU 3.8
2797    */
2798   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2799
2800 #endif
2801
2802   /**
2803    * Case-folds the characters in this string.
2804    *
2805    * Case-folding is locale-independent and not context-sensitive,
2806    * but there is an option for whether to include or exclude mappings for dotted I
2807    * and dotless i that are marked with 'T' in CaseFolding.txt.
2808    *
2809    * The result may be longer or shorter than the original.
2810    *
2811    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2812    * @return A reference to this.
2813    * @stable ICU 2.0
2814    */
2815   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2816
2817   //========================================
2818   // Access to the internal buffer
2819   //========================================
2820
2821   /**
2822    * Get a read/write pointer to the internal buffer.
2823    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2824    * writable, and is still owned by the UnicodeString object.
2825    * Calls to getBuffer(minCapacity) must not be nested, and
2826    * must be matched with calls to releaseBuffer(newLength).
2827    * If the string buffer was read-only or shared,
2828    * then it will be reallocated and copied.
2829    *
2830    * An attempted nested call will return 0, and will not further modify the
2831    * state of the UnicodeString object.
2832    * It also returns 0 if the string is bogus.
2833    *
2834    * The actual capacity of the string buffer may be larger than minCapacity.
2835    * getCapacity() returns the actual capacity.
2836    * For many operations, the full capacity should be used to avoid reallocations.
2837    *
2838    * While the buffer is "open" between getBuffer(minCapacity)
2839    * and releaseBuffer(newLength), the following applies:
2840    * - The string length is set to 0.
2841    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2842    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2843    * - You can read from and write to the returned buffer.
2844    * - The previous string contents will still be in the buffer;
2845    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2846    *   If the length() was greater than minCapacity, then any contents after minCapacity
2847    *   may be lost.
2848    *   The buffer contents is not NUL-terminated by getBuffer().
2849    *   If length()<getCapacity() then you can terminate it by writing a NUL
2850    *   at index length().
2851    * - You must call releaseBuffer(newLength) before and in order to
2852    *   return to normal UnicodeString operation.
2853    *
2854    * @param minCapacity the minimum number of UChars that are to be available
2855    *        in the buffer, starting at the returned pointer;
2856    *        default to the current string capacity if minCapacity==-1
2857    * @return a writable pointer to the internal string buffer,
2858    *         or 0 if an error occurs (nested calls, out of memory)
2859    *
2860    * @see releaseBuffer
2861    * @see getTerminatedBuffer()
2862    * @stable ICU 2.0
2863    */
2864   UChar *getBuffer(int32_t minCapacity);
2865
2866   /**
2867    * Release a read/write buffer on a UnicodeString object with an
2868    * "open" getBuffer(minCapacity).
2869    * This function must be called in a matched pair with getBuffer(minCapacity).
2870    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2871    *
2872    * It will set the string length to newLength, at most to the current capacity.
2873    * If newLength==-1 then it will set the length according to the
2874    * first NUL in the buffer, or to the capacity if there is no NUL.
2875    *
2876    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2877    *
2878    * @param newLength the new length of the UnicodeString object;
2879    *        defaults to the current capacity if newLength is greater than that;
2880    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2881    *        the current capacity of the string
2882    *
2883    * @see getBuffer(int32_t minCapacity)
2884    * @stable ICU 2.0
2885    */
2886   void releaseBuffer(int32_t newLength=-1);
2887
2888   /**
2889    * Get a read-only pointer to the internal buffer.
2890    * This can be called at any time on a valid UnicodeString.
2891    *
2892    * It returns 0 if the string is bogus, or
2893    * during an "open" getBuffer(minCapacity).
2894    *
2895    * It can be called as many times as desired.
2896    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2897    * at which time the pointer is semantically invalidated and must not be used any more.
2898    *
2899    * The capacity of the buffer can be determined with getCapacity().
2900    * The part after length() may or may not be initialized and valid,
2901    * depending on the history of the UnicodeString object.
2902    *
2903    * The buffer contents is (probably) not NUL-terminated.
2904    * You can check if it is with
2905    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2906    * (See getTerminatedBuffer().)
2907    *
2908    * The buffer may reside in read-only memory. Its contents must not
2909    * be modified.
2910    *
2911    * @return a read-only pointer to the internal string buffer,
2912    *         or 0 if the string is empty or bogus
2913    *
2914    * @see getBuffer(int32_t minCapacity)
2915    * @see getTerminatedBuffer()
2916    * @stable ICU 2.0
2917    */
2918   inline const UChar *getBuffer() const;
2919
2920   /**
2921    * Get a read-only pointer to the internal buffer,
2922    * making sure that it is NUL-terminated.
2923    * This can be called at any time on a valid UnicodeString.
2924    *
2925    * It returns 0 if the string is bogus, or
2926    * during an "open" getBuffer(minCapacity), or if the buffer cannot
2927    * be NUL-terminated (because memory allocation failed).
2928    *
2929    * It can be called as many times as desired.
2930    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2931    * at which time the pointer is semantically invalidated and must not be used any more.
2932    *
2933    * The capacity of the buffer can be determined with getCapacity().
2934    * The part after length()+1 may or may not be initialized and valid,
2935    * depending on the history of the UnicodeString object.
2936    *
2937    * The buffer contents is guaranteed to be NUL-terminated.
2938    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2939    * is written.
2940    * For this reason, this function is not const, unlike getBuffer().
2941    * Note that a UnicodeString may also contain NUL characters as part of its contents.
2942    *
2943    * The buffer may reside in read-only memory. Its contents must not
2944    * be modified.
2945    *
2946    * @return a read-only pointer to the internal string buffer,
2947    *         or 0 if the string is empty or bogus
2948    *
2949    * @see getBuffer(int32_t minCapacity)
2950    * @see getBuffer()
2951    * @stable ICU 2.2
2952    */
2953   const UChar *getTerminatedBuffer();
2954
2955   //========================================
2956   // Constructors
2957   //========================================
2958
2959   /** Construct an empty UnicodeString.
2960    * @stable ICU 2.0
2961    */
2962   inline UnicodeString();
2963
2964   /**
2965    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2966    * @param capacity the number of UChars this UnicodeString should hold
2967    * before a resize is necessary; if count is greater than 0 and count
2968    * code points c take up more space than capacity, then capacity is adjusted
2969    * accordingly.
2970    * @param c is used to initially fill the string
2971    * @param count specifies how many code points c are to be written in the
2972    *              string
2973    * @stable ICU 2.0
2974    */
2975   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2976
2977   /**
2978    * Single UChar (code unit) constructor.
2979    *
2980    * It is recommended to mark this constructor "explicit" by
2981    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2982    * on the compiler command line or similar.
2983    * @param ch the character to place in the UnicodeString
2984    * @stable ICU 2.0
2985    */
2986   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
2987
2988   /**
2989    * Single UChar32 (code point) constructor.
2990    *
2991    * It is recommended to mark this constructor "explicit" by
2992    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2993    * on the compiler command line or similar.
2994    * @param ch the character to place in the UnicodeString
2995    * @stable ICU 2.0
2996    */
2997   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2998
2999   /**
3000    * UChar* constructor.
3001    *
3002    * It is recommended to mark this constructor "explicit" by
3003    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3004    * on the compiler command line or similar.
3005    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
3006    * must be NULL (U+0000) terminated.
3007    * @stable ICU 2.0
3008    */
3009   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
3010
3011   /**
3012    * UChar* constructor.
3013    * @param text The characters to place in the UnicodeString.
3014    * @param textLength The number of Unicode characters in <TT>text</TT>
3015    * to copy.
3016    * @stable ICU 2.0
3017    */
3018   UnicodeString(const UChar *text,
3019         int32_t textLength);
3020
3021   /**
3022    * Readonly-aliasing UChar* constructor.
3023    * The text will be used for the UnicodeString object, but
3024    * it will not be released when the UnicodeString is destroyed.
3025    * This has copy-on-write semantics:
3026    * When the string is modified, then the buffer is first copied into
3027    * newly allocated memory.
3028    * The aliased buffer is never modified.
3029    *
3030    * In an assignment to another UnicodeString, when using the copy constructor
3031    * or the assignment operator, the text will be copied.
3032    * When using fastCopyFrom(), the text will be aliased again,
3033    * so that both strings then alias the same readonly-text.
3034    *
3035    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3036    *                     This must be true if <code>textLength==-1</code>.
3037    * @param text The characters to alias for the UnicodeString.
3038    * @param textLength The number of Unicode characters in <code>text</code> to alias.
3039    *                   If -1, then this constructor will determine the length
3040    *                   by calling <code>u_strlen()</code>.
3041    * @stable ICU 2.0
3042    */
3043   UnicodeString(UBool isTerminated,
3044                 const UChar *text,
3045                 int32_t textLength);
3046
3047   /**
3048    * Writable-aliasing UChar* constructor.
3049    * The text will be used for the UnicodeString object, but
3050    * it will not be released when the UnicodeString is destroyed.
3051    * This has write-through semantics:
3052    * For as long as the capacity of the buffer is sufficient, write operations
3053    * will directly affect the buffer. When more capacity is necessary, then
3054    * a new buffer will be allocated and the contents copied as with regularly
3055    * constructed strings.
3056    * In an assignment to another UnicodeString, the buffer will be copied.
3057    * The extract(UChar *dst) function detects whether the dst pointer is the same
3058    * as the string buffer itself and will in this case not copy the contents.
3059    *
3060    * @param buffer The characters to alias for the UnicodeString.
3061    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3062    * @param buffCapacity The size of <code>buffer</code> in UChars.
3063    * @stable ICU 2.0
3064    */
3065   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
3066
3067 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3068
3069   /**
3070    * char* constructor.
3071    * Uses the default converter (and thus depends on the ICU conversion code)
3072    * unless U_CHARSET_IS_UTF8 is set to 1.
3073    *
3074    * For ASCII (really "invariant character") strings it is more efficient to use
3075    * the constructor that takes a US_INV (for its enum EInvariant).
3076    * For ASCII (invariant-character) string literals, see UNICODE_STRING and
3077    * UNICODE_STRING_SIMPLE.
3078    *
3079    * It is recommended to mark this constructor "explicit" by
3080    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3081    * on the compiler command line or similar.
3082    * @param codepageData an array of bytes, null-terminated,
3083    *                     in the platform's default codepage.
3084    * @stable ICU 2.0
3085    * @see UNICODE_STRING
3086    * @see UNICODE_STRING_SIMPLE
3087    */
3088   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3089
3090   /**
3091    * char* constructor.
3092    * Uses the default converter (and thus depends on the ICU conversion code)
3093    * unless U_CHARSET_IS_UTF8 is set to 1.
3094    * @param codepageData an array of bytes in the platform's default codepage.
3095    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3096    * @stable ICU 2.0
3097    */
3098   UnicodeString(const char *codepageData, int32_t dataLength);
3099
3100 #endif
3101
3102 #if !UCONFIG_NO_CONVERSION
3103
3104   /**
3105    * char* constructor.
3106    * @param codepageData an array of bytes, null-terminated
3107    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3108    * value 0 for <TT>codepage</TT> indicates that the text is in the
3109    * platform's default codepage.
3110    *
3111    * If <code>codepage</code> is an empty string (<code>""</code>),
3112    * then a simple conversion is performed on the codepage-invariant
3113    * subset ("invariant characters") of the platform encoding. See utypes.h.
3114    * Recommendation: For invariant-character strings use the constructor
3115    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3116    * because it avoids object code dependencies of UnicodeString on
3117    * the conversion code.
3118    *
3119    * @stable ICU 2.0
3120    */
3121   UnicodeString(const char *codepageData, const char *codepage);
3122
3123   /**
3124    * char* constructor.
3125    * @param codepageData an array of bytes.
3126    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3127    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3128    * value 0 for <TT>codepage</TT> indicates that the text is in the
3129    * platform's default codepage.
3130    * If <code>codepage</code> is an empty string (<code>""</code>),
3131    * then a simple conversion is performed on the codepage-invariant
3132    * subset ("invariant characters") of the platform encoding. See utypes.h.
3133    * Recommendation: For invariant-character strings use the constructor
3134    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3135    * because it avoids object code dependencies of UnicodeString on
3136    * the conversion code.
3137    *
3138    * @stable ICU 2.0
3139    */
3140   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3141
3142   /**
3143    * char * / UConverter constructor.
3144    * This constructor uses an existing UConverter object to
3145    * convert the codepage string to Unicode and construct a UnicodeString
3146    * from that.
3147    *
3148    * The converter is reset at first.
3149    * If the error code indicates a failure before this constructor is called,
3150    * or if an error occurs during conversion or construction,
3151    * then the string will be bogus.
3152    *
3153    * This function avoids the overhead of opening and closing a converter if
3154    * multiple strings are constructed.
3155    *
3156    * @param src input codepage string
3157    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3158    * @param cnv converter object (ucnv_resetToUnicode() will be called),
3159    *        can be NULL for the default converter
3160    * @param errorCode normal ICU error code
3161    * @stable ICU 2.0
3162    */
3163   UnicodeString(
3164         const char *src, int32_t srcLength,
3165         UConverter *cnv,
3166         UErrorCode &errorCode);
3167
3168 #endif
3169
3170   /**
3171    * Constructs a Unicode string from an invariant-character char * string.
3172    * About invariant characters see utypes.h.
3173    * This constructor has no runtime dependency on conversion code and is
3174    * therefore recommended over ones taking a charset name string
3175    * (where the empty string "" indicates invariant-character conversion).
3176    *
3177    * Use the macro US_INV as the third, signature-distinguishing parameter.
3178    *
3179    * For example:
3180    * \code
3181    * void fn(const char *s) {
3182    *   UnicodeString ustr(s, -1, US_INV);
3183    *   // use ustr ...
3184    * }
3185    * \endcode
3186    *
3187    * @param src String using only invariant characters.
3188    * @param length Length of src, or -1 if NUL-terminated.
3189    * @param inv Signature-distinguishing paramater, use US_INV.
3190    *
3191    * @see US_INV
3192    * @stable ICU 3.2
3193    */
3194   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3195
3196
3197   /**
3198    * Copy constructor.
3199    *
3200    * Starting with ICU 2.4, the assignment operator and the copy constructor
3201    * allocate a new buffer and copy the buffer contents even for readonly aliases.
3202    * By contrast, the fastCopyFrom() function implements the old,
3203    * more efficient but less safe behavior
3204    * of making this string also a readonly alias to the same buffer.
3205    *
3206    * If the source object has an "open" buffer from getBuffer(minCapacity),
3207    * then the copy is an empty string.
3208    *
3209    * @param that The UnicodeString object to copy.
3210    * @stable ICU 2.0
3211    * @see fastCopyFrom
3212    */
3213   UnicodeString(const UnicodeString& that);
3214
3215 #ifndef U_HIDE_DRAFT_API
3216 #if U_HAVE_RVALUE_REFERENCES
3217   /**
3218    * Move constructor, might leave src in bogus state.
3219    * This string will have the same contents and state that the source string had.
3220    * @param src source string
3221    * @draft ICU 56
3222    */
3223   UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3224 #endif
3225 #endif  /* U_HIDE_DRAFT_API */
3226
3227   /**
3228    * 'Substring' constructor from tail of source string.
3229    * @param src The UnicodeString object to copy.
3230    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3231    * @stable ICU 2.2
3232    */
3233   UnicodeString(const UnicodeString& src, int32_t srcStart);
3234
3235   /**
3236    * 'Substring' constructor from subrange of source string.
3237    * @param src The UnicodeString object to copy.
3238    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3239    * @param srcLength The number of characters from <tt>src</tt> to copy.
3240    * @stable ICU 2.2
3241    */
3242   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3243
3244   /**
3245    * Clone this object, an instance of a subclass of Replaceable.
3246    * Clones can be used concurrently in multiple threads.
3247    * If a subclass does not implement clone(), or if an error occurs,
3248    * then NULL is returned.
3249    * The clone functions in all subclasses return a pointer to a Replaceable
3250    * because some compilers do not support covariant (same-as-this)
3251    * return types; cast to the appropriate subclass if necessary.
3252    * The caller must delete the clone.
3253    *
3254    * @return a clone of this object
3255    *
3256    * @see Replaceable::clone
3257    * @see getDynamicClassID
3258    * @stable ICU 2.6
3259    */
3260   virtual Replaceable *clone() const;
3261
3262   /** Destructor.
3263    * @stable ICU 2.0
3264    */
3265   virtual ~UnicodeString();
3266
3267   /**
3268    * Create a UnicodeString from a UTF-8 string.
3269    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3270    * Calls u_strFromUTF8WithSub().
3271    *
3272    * @param utf8 UTF-8 input string.
3273    *             Note that a StringPiece can be implicitly constructed
3274    *             from a std::string or a NUL-terminated const char * string.
3275    * @return A UnicodeString with equivalent UTF-16 contents.
3276    * @see toUTF8
3277    * @see toUTF8String
3278    * @stable ICU 4.2
3279    */
3280   static UnicodeString fromUTF8(const StringPiece &utf8);
3281
3282   /**
3283    * Create a UnicodeString from a UTF-32 string.
3284    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3285    * Calls u_strFromUTF32WithSub().
3286    *
3287    * @param utf32 UTF-32 input string. Must not be NULL.
3288    * @param length Length of the input string, or -1 if NUL-terminated.
3289    * @return A UnicodeString with equivalent UTF-16 contents.
3290    * @see toUTF32
3291    * @stable ICU 4.2
3292    */
3293   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3294
3295   /* Miscellaneous operations */
3296
3297   /**
3298    * Unescape a string of characters and return a string containing
3299    * the result.  The following escape sequences are recognized:
3300    *
3301    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3302    * \\Uhhhhhhhh   8 hex digits
3303    * \\xhh         1-2 hex digits
3304    * \\ooo         1-3 octal digits; o in [0-7]
3305    * \\cX          control-X; X is masked with 0x1F
3306    *
3307    * as well as the standard ANSI C escapes:
3308    *
3309    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3310    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3311    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3312    *
3313    * Anything else following a backslash is generically escaped.  For
3314    * example, "[a\\-z]" returns "[a-z]".
3315    *
3316    * If an escape sequence is ill-formed, this method returns an empty
3317    * string.  An example of an ill-formed sequence is "\\u" followed by
3318    * fewer than 4 hex digits.
3319    *
3320    * This function is similar to u_unescape() but not identical to it.
3321    * The latter takes a source char*, so it does escape recognition
3322    * and also invariant conversion.
3323    *
3324    * @return a string with backslash escapes interpreted, or an
3325    * empty string on error.
3326    * @see UnicodeString#unescapeAt()
3327    * @see u_unescape()
3328    * @see u_unescapeAt()
3329    * @stable ICU 2.0
3330    */
3331   UnicodeString unescape() const;
3332
3333   /**
3334    * Unescape a single escape sequence and return the represented
3335    * character.  See unescape() for a listing of the recognized escape
3336    * sequences.  The character at offset-1 is assumed (without
3337    * checking) to be a backslash.  If the escape sequence is
3338    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3339    * returned.
3340    *
3341    * @param offset an input output parameter.  On input, it is the
3342    * offset into this string where the escape sequence is located,
3343    * after the initial backslash.  On output, it is advanced after the
3344    * last character parsed.  On error, it is not advanced at all.
3345    * @return the character represented by the escape sequence at
3346    * offset, or U_SENTINEL=-1 on error.
3347    * @see UnicodeString#unescape()
3348    * @see u_unescape()
3349    * @see u_unescapeAt()
3350    * @stable ICU 2.0
3351    */
3352   UChar32 unescapeAt(int32_t &offset) const;
3353
3354   /**
3355    * ICU "poor man's RTTI", returns a UClassID for this class.
3356    *
3357    * @stable ICU 2.2
3358    */
3359   static UClassID U_EXPORT2 getStaticClassID();
3360
3361   /**
3362    * ICU "poor man's RTTI", returns a UClassID for the actual class.
3363    *
3364    * @stable ICU 2.2
3365    */
3366   virtual UClassID getDynamicClassID() const;
3367
3368   //========================================
3369   // Implementation methods
3370   //========================================
3371
3372 protected:
3373   /**
3374    * Implement Replaceable::getLength() (see jitterbug 1027).
3375    * @stable ICU 2.4
3376    */
3377   virtual int32_t getLength() const;
3378
3379   /**
3380    * The change in Replaceable to use virtual getCharAt() allows
3381    * UnicodeString::charAt() to be inline again (see jitterbug 709).
3382    * @stable ICU 2.4
3383    */
3384   virtual UChar getCharAt(int32_t offset) const;
3385
3386   /**
3387    * The change in Replaceable to use virtual getChar32At() allows
3388    * UnicodeString::char32At() to be inline again (see jitterbug 709).
3389    * @stable ICU 2.4
3390    */
3391   virtual UChar32 getChar32At(int32_t offset) const;
3392
3393 private:
3394   // For char* constructors. Could be made public.
3395   UnicodeString &setToUTF8(const StringPiece &utf8);
3396   // For extract(char*).
3397   // We could make a toUTF8(target, capacity, errorCode) public but not
3398   // this version: New API will be cleaner if we make callers create substrings
3399   // rather than having start+length on every method,
3400   // and it should take a UErrorCode&.
3401   int32_t
3402   toUTF8(int32_t start, int32_t len,
3403          char *target, int32_t capacity) const;
3404
3405   /**
3406    * Internal string contents comparison, called by operator==.
3407    * Requires: this & text not bogus and have same lengths.
3408    */
3409   UBool doEquals(const UnicodeString &text, int32_t len) const;
3410
3411   inline int8_t
3412   doCompare(int32_t start,
3413            int32_t length,
3414            const UnicodeString& srcText,
3415            int32_t srcStart,
3416            int32_t srcLength) const;
3417
3418   int8_t doCompare(int32_t start,
3419            int32_t length,
3420            const UChar *srcChars,
3421            int32_t srcStart,
3422            int32_t srcLength) const;
3423
3424   inline int8_t
3425   doCompareCodePointOrder(int32_t start,
3426                           int32_t length,
3427                           const UnicodeString& srcText,
3428                           int32_t srcStart,
3429                           int32_t srcLength) const;
3430
3431   int8_t doCompareCodePointOrder(int32_t start,
3432                                  int32_t length,
3433                                  const UChar *srcChars,
3434                                  int32_t srcStart,
3435                                  int32_t srcLength) const;
3436
3437   inline int8_t
3438   doCaseCompare(int32_t start,
3439                 int32_t length,
3440                 const UnicodeString &srcText,
3441                 int32_t srcStart,
3442                 int32_t srcLength,
3443                 uint32_t options) const;
3444
3445   int8_t
3446   doCaseCompare(int32_t start,
3447                 int32_t length,
3448                 const UChar *srcChars,
3449                 int32_t srcStart,
3450                 int32_t srcLength,
3451                 uint32_t options) const;
3452
3453   int32_t doIndexOf(UChar c,
3454             int32_t start,
3455             int32_t length) const;
3456
3457   int32_t doIndexOf(UChar32 c,
3458                         int32_t start,
3459                         int32_t length) const;
3460
3461   int32_t doLastIndexOf(UChar c,
3462                 int32_t start,
3463                 int32_t length) const;
3464
3465   int32_t doLastIndexOf(UChar32 c,
3466                             int32_t start,
3467                             int32_t length) const;
3468
3469   void doExtract(int32_t start,
3470          int32_t length,
3471          UChar *dst,
3472          int32_t dstStart) const;
3473
3474   inline void doExtract(int32_t start,
3475          int32_t length,
3476          UnicodeString& target) const;
3477
3478   inline UChar doCharAt(int32_t offset)  const;
3479
3480   UnicodeString& doReplace(int32_t start,
3481                int32_t length,
3482                const UnicodeString& srcText,
3483                int32_t srcStart,
3484                int32_t srcLength);
3485
3486   UnicodeString& doReplace(int32_t start,
3487                int32_t length,
3488                const UChar *srcChars,
3489                int32_t srcStart,
3490                int32_t srcLength);
3491
3492   UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3493   UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
3494
3495   UnicodeString& doReverse(int32_t start,
3496                int32_t length);
3497
3498   // calculate hash code
3499   int32_t doHashCode(void) const;
3500
3501   // get pointer to start of array
3502   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3503   inline UChar* getArrayStart(void);
3504   inline const UChar* getArrayStart(void) const;
3505
3506   inline UBool hasShortLength() const;
3507   inline int32_t getShortLength() const;
3508
3509   // A UnicodeString object (not necessarily its current buffer)
3510   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3511   inline UBool isWritable() const;
3512
3513   // Is the current buffer writable?
3514   inline UBool isBufferWritable() const;
3515
3516   // None of the following does releaseArray().
3517   inline void setZeroLength();
3518   inline void setShortLength(int32_t len);
3519   inline void setLength(int32_t len);
3520   inline void setToEmpty();
3521   inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
3522
3523   // allocate the array; result may be the stack buffer
3524   // sets refCount to 1 if appropriate
3525   // sets fArray, fCapacity, and flags
3526   // sets length to 0
3527   // returns boolean for success or failure
3528   UBool allocate(int32_t capacity);
3529
3530   // release the array if owned
3531   void releaseArray(void);
3532
3533   // turn a bogus string into an empty one
3534   void unBogus();
3535
3536   // implements assigment operator, copy constructor, and fastCopyFrom()
3537   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3538
3539   // Copies just the fields without memory management.
3540   void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3541
3542   // Pin start and limit to acceptable values.
3543   inline void pinIndex(int32_t& start) const;
3544   inline void pinIndices(int32_t& start,
3545                          int32_t& length) const;
3546
3547 #if !UCONFIG_NO_CONVERSION
3548
3549   /* Internal extract() using UConverter. */
3550   int32_t doExtract(int32_t start, int32_t length,
3551                     char *dest, int32_t destCapacity,
3552                     UConverter *cnv,
3553                     UErrorCode &errorCode) const;
3554
3555   /*
3556    * Real constructor for converting from codepage data.
3557    * It assumes that it is called with !fRefCounted.
3558    *
3559    * If <code>codepage==0</code>, then the default converter
3560    * is used for the platform encoding.
3561    * If <code>codepage</code> is an empty string (<code>""</code>),
3562    * then a simple conversion is performed on the codepage-invariant
3563    * subset ("invariant characters") of the platform encoding. See utypes.h.
3564    */
3565   void doCodepageCreate(const char *codepageData,
3566                         int32_t dataLength,
3567                         const char *codepage);
3568
3569   /*
3570    * Worker function for creating a UnicodeString from
3571    * a codepage string using a UConverter.
3572    */
3573   void
3574   doCodepageCreate(const char *codepageData,
3575                    int32_t dataLength,
3576                    UConverter *converter,
3577                    UErrorCode &status);
3578
3579 #endif
3580
3581   /*
3582    * This function is called when write access to the array
3583    * is necessary.
3584    *
3585    * We need to make a copy of the array if
3586    * the buffer is read-only, or
3587    * the buffer is refCounted (shared), and refCount>1, or
3588    * the buffer is too small.
3589    *
3590    * Return FALSE if memory could not be allocated.
3591    */
3592   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3593                             int32_t growCapacity = -1,
3594                             UBool doCopyArray = TRUE,
3595                             int32_t **pBufferToDelete = 0,
3596                             UBool forceClone = FALSE);
3597
3598   /**
3599    * Common function for UnicodeString case mappings.
3600    * The stringCaseMapper has the same type UStringCaseMapper
3601    * as in ustr_imp.h for ustrcase_map().
3602    */
3603   UnicodeString &
3604   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3605
3606   // ref counting
3607   void addRef(void);
3608   int32_t removeRef(void);
3609   int32_t refCount(void) const;
3610
3611   // constants
3612   enum {
3613     /**
3614      * Size of stack buffer for short strings.
3615      * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3616      * @see UNISTR_OBJECT_SIZE
3617      */
3618     US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3619     kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3620     kGrowSize=128, // grow size for this buffer
3621     kInvalidHashCode=0, // invalid hash code
3622     kEmptyHashCode=1, // hash code for empty string
3623
3624     // bit flag values for fLengthAndFlags
3625     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3626     kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3627     kRefCounted=4,      // there is a refCount field before the characters in fArray
3628     kBufferIsReadonly=8,// do not write to this buffer
3629     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3630                         // and releaseBuffer(newLength) must be called
3631     kAllStorageFlags=0x1f,
3632
3633     kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
3634     kLength1=1<<kLengthShift,
3635     kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
3636     kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
3637
3638     // combined values for convenience
3639     kShortString=kUsingStackBuffer,
3640     kLongString=kRefCounted,
3641     kReadonlyAlias=kBufferIsReadonly,
3642     kWritableAlias=0
3643   };
3644
3645   friend class UnicodeStringAppendable;
3646
3647   union StackBufferOrFields;        // forward declaration necessary before friend declaration
3648   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3649
3650   /*
3651    * The following are all the class fields that are stored
3652    * in each UnicodeString object.
3653    * Note that UnicodeString has virtual functions,
3654    * therefore there is an implicit vtable pointer
3655    * as the first real field.
3656    * The fields should be aligned such that no padding is necessary.
3657    * On 32-bit machines, the size should be 32 bytes,
3658    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3659    *
3660    * We use a hack to achieve this.
3661    *
3662    * With at least some compilers, each of the following is forced to
3663    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3664    * rounded up with additional padding if the fields do not already fit that requirement:
3665    * - sizeof(class UnicodeString)
3666    * - offsetof(UnicodeString, fUnion)
3667    * - sizeof(fUnion)
3668    * - sizeof(fStackFields)
3669    *
3670    * We optimize for the longest possible internal buffer for short strings.
3671    * fUnion.fStackFields begins with 2 bytes for storage flags
3672    * and the length of relatively short strings,
3673    * followed by the buffer for short string contents.
3674    * There is no padding inside fStackFields.
3675    *
3676    * Heap-allocated and aliased strings use fUnion.fFields.
3677    * Both fStackFields and fFields must begin with the same fields for flags and short length,
3678    * that is, those must have the same memory offsets inside the object,
3679    * because the flags must be inspected in order to decide which half of fUnion is being used.
3680    * We assume that the compiler does not reorder the fields.
3681    *
3682    * (Padding at the end of fFields is ok:
3683    * As long as it is no larger than fStackFields, it is not wasted space.)
3684    *
3685    * For some of the history of the UnicodeString class fields layout, see
3686    * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3687    * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3688    * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3689    */
3690   // (implicit) *vtable;
3691   union StackBufferOrFields {
3692     // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3693     // Each struct of the union must begin with fLengthAndFlags.
3694     struct {
3695       int16_t fLengthAndFlags;          // bit fields: see constants above
3696       UChar fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
3697     } fStackFields;
3698     struct {
3699       int16_t fLengthAndFlags;          // bit fields: see constants above
3700       int32_t fLength;    // number of characters in fArray if >127; else undefined
3701       int32_t fCapacity;  // capacity of fArray (in UChars)
3702       // array pointer last to minimize padding for machines with P128 data model
3703       // or pointer sizes that are not a power of 2
3704       UChar   *fArray;    // the Unicode data
3705     } fFields;
3706   } fUnion;
3707 };
3708
3709 /**
3710  * Create a new UnicodeString with the concatenation of two others.
3711  *
3712  * @param s1 The first string to be copied to the new one.
3713  * @param s2 The second string to be copied to the new one, after s1.
3714  * @return UnicodeString(s1).append(s2)
3715  * @stable ICU 2.8
3716  */
3717 U_COMMON_API UnicodeString U_EXPORT2
3718 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3719
3720 //========================================
3721 // Inline members
3722 //========================================
3723
3724 //========================================
3725 // Privates
3726 //========================================
3727
3728 inline void
3729 UnicodeString::pinIndex(int32_t& start) const
3730 {
3731   // pin index
3732   if(start < 0) {
3733     start = 0;
3734   } else if(start > length()) {
3735     start = length();
3736   }
3737 }
3738
3739 inline void
3740 UnicodeString::pinIndices(int32_t& start,
3741                           int32_t& _length) const
3742 {
3743   // pin indices
3744   int32_t len = length();
3745   if(start < 0) {
3746     start = 0;
3747   } else if(start > len) {
3748     start = len;
3749   }
3750   if(_length < 0) {
3751     _length = 0;
3752   } else if(_length > (len - start)) {
3753     _length = (len - start);
3754   }
3755 }
3756
3757 inline UChar*
3758 UnicodeString::getArrayStart() {
3759   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3760     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3761 }
3762
3763 inline const UChar*
3764 UnicodeString::getArrayStart() const {
3765   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3766     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3767 }
3768
3769 //========================================
3770 // Default constructor
3771 //========================================
3772
3773 inline
3774 UnicodeString::UnicodeString() {
3775   fUnion.fStackFields.fLengthAndFlags=kShortString;
3776 }
3777
3778 //========================================
3779 // Read-only implementation methods
3780 //========================================
3781 inline UBool
3782 UnicodeString::hasShortLength() const {
3783   return fUnion.fFields.fLengthAndFlags>=0;
3784 }
3785
3786 inline int32_t
3787 UnicodeString::getShortLength() const {
3788   // fLengthAndFlags must be non-negative -> short length >= 0
3789   // and arithmetic or logical shift does not matter.
3790   return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3791 }
3792
3793 inline int32_t
3794 UnicodeString::length() const {
3795   return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3796 }
3797
3798 inline int32_t
3799 UnicodeString::getCapacity() const {
3800   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3801     US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3802 }
3803
3804 inline int32_t
3805 UnicodeString::hashCode() const
3806 { return doHashCode(); }
3807
3808 inline UBool
3809 UnicodeString::isBogus() const
3810 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3811
3812 inline UBool
3813 UnicodeString::isWritable() const
3814 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3815
3816 inline UBool
3817 UnicodeString::isBufferWritable() const
3818 {
3819   return (UBool)(
3820       !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3821       (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3822 }
3823
3824 inline const UChar *
3825 UnicodeString::getBuffer() const {
3826   if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3827     return 0;
3828   } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3829     return fUnion.fStackFields.fBuffer;
3830   } else {
3831     return fUnion.fFields.fArray;
3832   }
3833 }
3834
3835 //========================================
3836 // Read-only alias methods
3837 //========================================
3838 inline int8_t
3839 UnicodeString::doCompare(int32_t start,
3840               int32_t thisLength,
3841               const UnicodeString& srcText,
3842               int32_t srcStart,
3843               int32_t srcLength) const
3844 {
3845   if(srcText.isBogus()) {
3846     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3847   } else {
3848     srcText.pinIndices(srcStart, srcLength);
3849     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3850   }
3851 }
3852
3853 inline UBool
3854 UnicodeString::operator== (const UnicodeString& text) const
3855 {
3856   if(isBogus()) {
3857     return text.isBogus();
3858   } else {
3859     int32_t len = length(), textLength = text.length();
3860     return !text.isBogus() && len == textLength && doEquals(text, len);
3861   }
3862 }
3863
3864 inline UBool
3865 UnicodeString::operator!= (const UnicodeString& text) const
3866 { return (! operator==(text)); }
3867
3868 inline UBool
3869 UnicodeString::operator> (const UnicodeString& text) const
3870 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3871
3872 inline UBool
3873 UnicodeString::operator< (const UnicodeString& text) const
3874 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3875
3876 inline UBool
3877 UnicodeString::operator>= (const UnicodeString& text) const
3878 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3879
3880 inline UBool
3881 UnicodeString::operator<= (const UnicodeString& text) const
3882 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3883
3884 inline int8_t
3885 UnicodeString::compare(const UnicodeString& text) const
3886 { return doCompare(0, length(), text, 0, text.length()); }
3887
3888 inline int8_t
3889 UnicodeString::compare(int32_t start,
3890                int32_t _length,
3891                const UnicodeString& srcText) const
3892 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3893
3894 inline int8_t
3895 UnicodeString::compare(const UChar *srcChars,
3896                int32_t srcLength) const
3897 { return doCompare(0, length(), srcChars, 0, srcLength); }
3898
3899 inline int8_t
3900 UnicodeString::compare(int32_t start,
3901                int32_t _length,
3902                const UnicodeString& srcText,
3903                int32_t srcStart,
3904                int32_t srcLength) const
3905 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3906
3907 inline int8_t
3908 UnicodeString::compare(int32_t start,
3909                int32_t _length,
3910                const UChar *srcChars) const
3911 { return doCompare(start, _length, srcChars, 0, _length); }
3912
3913 inline int8_t
3914 UnicodeString::compare(int32_t start,
3915                int32_t _length,
3916                const UChar *srcChars,
3917                int32_t srcStart,
3918                int32_t srcLength) const
3919 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3920
3921 inline int8_t
3922 UnicodeString::compareBetween(int32_t start,
3923                   int32_t limit,
3924                   const UnicodeString& srcText,
3925                   int32_t srcStart,
3926                   int32_t srcLimit) const
3927 { return doCompare(start, limit - start,
3928            srcText, srcStart, srcLimit - srcStart); }
3929
3930 inline int8_t
3931 UnicodeString::doCompareCodePointOrder(int32_t start,
3932                                        int32_t thisLength,
3933                                        const UnicodeString& srcText,
3934                                        int32_t srcStart,
3935                                        int32_t srcLength) const
3936 {
3937   if(srcText.isBogus()) {
3938     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3939   } else {
3940     srcText.pinIndices(srcStart, srcLength);
3941     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3942   }
3943 }
3944
3945 inline int8_t
3946 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3947 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3948
3949 inline int8_t
3950 UnicodeString::compareCodePointOrder(int32_t start,
3951                                      int32_t _length,
3952                                      const UnicodeString& srcText) const
3953 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3954
3955 inline int8_t
3956 UnicodeString::compareCodePointOrder(const UChar *srcChars,
3957                                      int32_t srcLength) const
3958 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3959
3960 inline int8_t
3961 UnicodeString::compareCodePointOrder(int32_t start,
3962                                      int32_t _length,
3963                                      const UnicodeString& srcText,
3964                                      int32_t srcStart,
3965                                      int32_t srcLength) const
3966 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3967
3968 inline int8_t
3969 UnicodeString::compareCodePointOrder(int32_t start,
3970                                      int32_t _length,
3971                                      const UChar *srcChars) const
3972 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3973
3974 inline int8_t
3975 UnicodeString::compareCodePointOrder(int32_t start,
3976                                      int32_t _length,
3977                                      const UChar *srcChars,
3978                                      int32_t srcStart,
3979                                      int32_t srcLength) const
3980 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3981
3982 inline int8_t
3983 UnicodeString::compareCodePointOrderBetween(int32_t start,
3984                                             int32_t limit,
3985                                             const UnicodeString& srcText,
3986                                             int32_t srcStart,
3987                                             int32_t srcLimit) const
3988 { return doCompareCodePointOrder(start, limit - start,
3989            srcText, srcStart, srcLimit - srcStart); }
3990
3991 inline int8_t
3992 UnicodeString::doCaseCompare(int32_t start,
3993                              int32_t thisLength,
3994                              const UnicodeString &srcText,
3995                              int32_t srcStart,
3996                              int32_t srcLength,
3997                              uint32_t options) const
3998 {
3999   if(srcText.isBogus()) {
4000     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4001   } else {
4002     srcText.pinIndices(srcStart, srcLength);
4003     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4004   }
4005 }
4006
4007 inline int8_t
4008 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4009   return doCaseCompare(0, length(), text, 0, text.length(), options);
4010 }
4011
4012 inline int8_t
4013 UnicodeString::caseCompare(int32_t start,
4014                            int32_t _length,
4015                            const UnicodeString &srcText,
4016                            uint32_t options) const {
4017   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4018 }
4019
4020 inline int8_t
4021 UnicodeString::caseCompare(const UChar *srcChars,
4022                            int32_t srcLength,
4023                            uint32_t options) const {
4024   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4025 }
4026
4027 inline int8_t
4028 UnicodeString::caseCompare(int32_t start,
4029                            int32_t _length,
4030                            const UnicodeString &srcText,
4031                            int32_t srcStart,
4032                            int32_t srcLength,
4033                            uint32_t options) const {
4034   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4035 }
4036
4037 inline int8_t
4038 UnicodeString::caseCompare(int32_t start,
4039                            int32_t _length,
4040                            const UChar *srcChars,
4041                            uint32_t options) const {
4042   return doCaseCompare(start, _length, srcChars, 0, _length, options);
4043 }
4044
4045 inline int8_t
4046 UnicodeString::caseCompare(int32_t start,
4047                            int32_t _length,
4048                            const UChar *srcChars,
4049                            int32_t srcStart,
4050                            int32_t srcLength,
4051                            uint32_t options) const {
4052   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4053 }
4054
4055 inline int8_t
4056 UnicodeString::caseCompareBetween(int32_t start,
4057                                   int32_t limit,
4058                                   const UnicodeString &srcText,
4059                                   int32_t srcStart,
4060                                   int32_t srcLimit,
4061                                   uint32_t options) const {
4062   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4063 }
4064
4065 inline int32_t
4066 UnicodeString::indexOf(const UnicodeString& srcText,
4067                int32_t srcStart,
4068                int32_t srcLength,
4069                int32_t start,
4070                int32_t _length) const
4071 {
4072   if(!srcText.isBogus()) {
4073     srcText.pinIndices(srcStart, srcLength);
4074     if(srcLength > 0) {
4075       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4076     }
4077   }
4078   return -1;
4079 }
4080
4081 inline int32_t
4082 UnicodeString::indexOf(const UnicodeString& text) const
4083 { return indexOf(text, 0, text.length(), 0, length()); }
4084
4085 inline int32_t
4086 UnicodeString::indexOf(const UnicodeString& text,
4087                int32_t start) const {
4088   pinIndex(start);
4089   return indexOf(text, 0, text.length(), start, length() - start);
4090 }
4091
4092 inline int32_t
4093 UnicodeString::indexOf(const UnicodeString& text,
4094                int32_t start,
4095                int32_t _length) const
4096 { return indexOf(text, 0, text.length(), start, _length); }
4097
4098 inline int32_t
4099 UnicodeString::indexOf(const UChar *srcChars,
4100                int32_t srcLength,
4101                int32_t start) const {
4102   pinIndex(start);
4103   return indexOf(srcChars, 0, srcLength, start, length() - start);
4104 }
4105
4106 inline int32_t
4107 UnicodeString::indexOf(const UChar *srcChars,
4108                int32_t srcLength,
4109                int32_t start,
4110                int32_t _length) const
4111 { return indexOf(srcChars, 0, srcLength, start, _length); }
4112
4113 inline int32_t
4114 UnicodeString::indexOf(UChar c,
4115                int32_t start,
4116                int32_t _length) const
4117 { return doIndexOf(c, start, _length); }
4118
4119 inline int32_t
4120 UnicodeString::indexOf(UChar32 c,
4121                int32_t start,
4122                int32_t _length) const
4123 { return doIndexOf(c, start, _length); }
4124
4125 inline int32_t
4126 UnicodeString::indexOf(UChar c) const
4127 { return doIndexOf(c, 0, length()); }
4128
4129 inline int32_t
4130 UnicodeString::indexOf(UChar32 c) const
4131 { return indexOf(c, 0, length()); }
4132
4133 inline int32_t
4134 UnicodeString::indexOf(UChar c,
4135                int32_t start) const {
4136   pinIndex(start);
4137   return doIndexOf(c, start, length() - start);
4138 }
4139
4140 inline int32_t
4141 UnicodeString::indexOf(UChar32 c,
4142                int32_t start) const {
4143   pinIndex(start);
4144   return indexOf(c, start, length() - start);
4145 }
4146
4147 inline int32_t
4148 UnicodeString::lastIndexOf(const UChar *srcChars,
4149                int32_t srcLength,
4150                int32_t start,
4151                int32_t _length) const
4152 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4153
4154 inline int32_t
4155 UnicodeString::lastIndexOf(const UChar *srcChars,
4156                int32_t srcLength,
4157                int32_t start) const {
4158   pinIndex(start);
4159   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4160 }
4161
4162 inline int32_t
4163 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4164                int32_t srcStart,
4165                int32_t srcLength,
4166                int32_t start,
4167                int32_t _length) const
4168 {
4169   if(!srcText.isBogus()) {
4170     srcText.pinIndices(srcStart, srcLength);
4171     if(srcLength > 0) {
4172       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4173     }
4174   }
4175   return -1;
4176 }
4177
4178 inline int32_t
4179 UnicodeString::lastIndexOf(const UnicodeString& text,
4180                int32_t start,
4181                int32_t _length) const
4182 { return lastIndexOf(text, 0, text.length(), start, _length); }
4183
4184 inline int32_t
4185 UnicodeString::lastIndexOf(const UnicodeString& text,
4186                int32_t start) const {
4187   pinIndex(start);
4188   return lastIndexOf(text, 0, text.length(), start, length() - start);
4189 }
4190
4191 inline int32_t
4192 UnicodeString::lastIndexOf(const UnicodeString& text) const
4193 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4194
4195 inline int32_t
4196 UnicodeString::lastIndexOf(UChar c,
4197                int32_t start,
4198                int32_t _length) const
4199 { return doLastIndexOf(c, start, _length); }
4200
4201 inline int32_t
4202 UnicodeString::lastIndexOf(UChar32 c,
4203                int32_t start,
4204                int32_t _length) const {
4205   return doLastIndexOf(c, start, _length);
4206 }
4207
4208 inline int32_t
4209 UnicodeString::lastIndexOf(UChar c) const
4210 { return doLastIndexOf(c, 0, length()); }
4211
4212 inline int32_t
4213 UnicodeString::lastIndexOf(UChar32 c) const {
4214   return lastIndexOf(c, 0, length());
4215 }
4216
4217 inline int32_t
4218 UnicodeString::lastIndexOf(UChar c,
4219                int32_t start) const {
4220   pinIndex(start);
4221   return doLastIndexOf(c, start, length() - start);
4222 }
4223
4224 inline int32_t
4225 UnicodeString::lastIndexOf(UChar32 c,
4226                int32_t start) const {
4227   pinIndex(start);
4228   return lastIndexOf(c, start, length() - start);
4229 }
4230
4231 inline UBool
4232 UnicodeString::startsWith(const UnicodeString& text) const
4233 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4234
4235 inline UBool
4236 UnicodeString::startsWith(const UnicodeString& srcText,
4237               int32_t srcStart,
4238               int32_t srcLength) const
4239 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4240
4241 inline UBool
4242 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4243   if(srcLength < 0) {
4244     srcLength = u_strlen(srcChars);
4245   }
4246   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4247 }
4248
4249 inline UBool
4250 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4251   if(srcLength < 0) {
4252     srcLength = u_strlen(srcChars);
4253   }
4254   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4255 }
4256
4257 inline UBool
4258 UnicodeString::endsWith(const UnicodeString& text) const
4259 { return doCompare(length() - text.length(), text.length(),
4260            text, 0, text.length()) == 0; }
4261
4262 inline UBool
4263 UnicodeString::endsWith(const UnicodeString& srcText,
4264             int32_t srcStart,
4265             int32_t srcLength) const {
4266   srcText.pinIndices(srcStart, srcLength);
4267   return doCompare(length() - srcLength, srcLength,
4268                    srcText, srcStart, srcLength) == 0;
4269 }
4270
4271 inline UBool
4272 UnicodeString::endsWith(const UChar *srcChars,
4273             int32_t srcLength) const {
4274   if(srcLength < 0) {
4275     srcLength = u_strlen(srcChars);
4276   }
4277   return doCompare(length() - srcLength, srcLength,
4278                    srcChars, 0, srcLength) == 0;
4279 }
4280
4281 inline UBool
4282 UnicodeString::endsWith(const UChar *srcChars,
4283             int32_t srcStart,
4284             int32_t srcLength) const {
4285   if(srcLength < 0) {
4286     srcLength = u_strlen(srcChars + srcStart);
4287   }
4288   return doCompare(length() - srcLength, srcLength,
4289                    srcChars, srcStart, srcLength) == 0;
4290 }
4291
4292 //========================================
4293 // replace
4294 //========================================
4295 inline UnicodeString&
4296 UnicodeString::replace(int32_t start,
4297                int32_t _length,
4298                const UnicodeString& srcText)
4299 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4300
4301 inline UnicodeString&
4302 UnicodeString::replace(int32_t start,
4303                int32_t _length,
4304                const UnicodeString& srcText,
4305                int32_t srcStart,
4306                int32_t srcLength)
4307 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4308
4309 inline UnicodeString&
4310 UnicodeString::replace(int32_t start,
4311                int32_t _length,
4312                const UChar *srcChars,
4313                int32_t srcLength)
4314 { return doReplace(start, _length, srcChars, 0, srcLength); }
4315
4316 inline UnicodeString&
4317 UnicodeString::replace(int32_t start,
4318                int32_t _length,
4319                const UChar *srcChars,
4320                int32_t srcStart,
4321                int32_t srcLength)
4322 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4323
4324 inline UnicodeString&
4325 UnicodeString::replace(int32_t start,
4326                int32_t _length,
4327                UChar srcChar)
4328 { return doReplace(start, _length, &srcChar, 0, 1); }
4329
4330 inline UnicodeString&
4331 UnicodeString::replaceBetween(int32_t start,
4332                   int32_t limit,
4333                   const UnicodeString& srcText)
4334 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4335
4336 inline UnicodeString&
4337 UnicodeString::replaceBetween(int32_t start,
4338                   int32_t limit,
4339                   const UnicodeString& srcText,
4340                   int32_t srcStart,
4341                   int32_t srcLimit)
4342 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4343
4344 inline UnicodeString&
4345 UnicodeString::findAndReplace(const UnicodeString& oldText,
4346                   const UnicodeString& newText)
4347 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4348             newText, 0, newText.length()); }
4349
4350 inline UnicodeString&
4351 UnicodeString::findAndReplace(int32_t start,
4352                   int32_t _length,
4353                   const UnicodeString& oldText,
4354                   const UnicodeString& newText)
4355 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4356             newText, 0, newText.length()); }
4357
4358 // ============================
4359 // extract
4360 // ============================
4361 inline void
4362 UnicodeString::doExtract(int32_t start,
4363              int32_t _length,
4364              UnicodeString& target) const
4365 { target.replace(0, target.length(), *this, start, _length); }
4366
4367 inline void
4368 UnicodeString::extract(int32_t start,
4369                int32_t _length,
4370                UChar *target,
4371                int32_t targetStart) const
4372 { doExtract(start, _length, target, targetStart); }
4373
4374 inline void
4375 UnicodeString::extract(int32_t start,
4376                int32_t _length,
4377                UnicodeString& target) const
4378 { doExtract(start, _length, target); }
4379
4380 #if !UCONFIG_NO_CONVERSION
4381
4382 inline int32_t
4383 UnicodeString::extract(int32_t start,
4384                int32_t _length,
4385                char *dst,
4386                const char *codepage) const
4387
4388 {
4389   // This dstSize value will be checked explicitly
4390   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4391 }
4392
4393 #endif
4394
4395 inline void
4396 UnicodeString::extractBetween(int32_t start,
4397                   int32_t limit,
4398                   UChar *dst,
4399                   int32_t dstStart) const {
4400   pinIndex(start);
4401   pinIndex(limit);
4402   doExtract(start, limit - start, dst, dstStart);
4403 }
4404
4405 inline UnicodeString
4406 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4407     return tempSubString(start, limit - start);
4408 }
4409
4410 inline UChar
4411 UnicodeString::doCharAt(int32_t offset) const
4412 {
4413   if((uint32_t)offset < (uint32_t)length()) {
4414     return getArrayStart()[offset];
4415   } else {
4416     return kInvalidUChar;
4417   }
4418 }
4419
4420 inline UChar
4421 UnicodeString::charAt(int32_t offset) const
4422 { return doCharAt(offset); }
4423
4424 inline UChar
4425 UnicodeString::operator[] (int32_t offset) const
4426 { return doCharAt(offset); }
4427
4428 inline UBool
4429 UnicodeString::isEmpty() const {
4430   // Arithmetic or logical right shift does not matter: only testing for 0.
4431   return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4432 }
4433
4434 //========================================
4435 // Write implementation methods
4436 //========================================
4437 inline void
4438 UnicodeString::setZeroLength() {
4439   fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4440 }
4441
4442 inline void
4443 UnicodeString::setShortLength(int32_t len) {
4444   // requires 0 <= len <= kMaxShortLength
4445   fUnion.fFields.fLengthAndFlags =
4446     (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4447 }
4448
4449 inline void
4450 UnicodeString::setLength(int32_t len) {
4451   if(len <= kMaxShortLength) {
4452     setShortLength(len);
4453   } else {
4454     fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4455     fUnion.fFields.fLength = len;
4456   }
4457 }
4458
4459 inline void
4460 UnicodeString::setToEmpty() {
4461   fUnion.fFields.fLengthAndFlags = kShortString;
4462 }
4463
4464 inline void
4465 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4466   setLength(len);
4467   fUnion.fFields.fArray = array;
4468   fUnion.fFields.fCapacity = capacity;
4469 }
4470
4471 inline UnicodeString&
4472 UnicodeString::operator= (UChar ch)
4473 { return doReplace(0, length(), &ch, 0, 1); }
4474
4475 inline UnicodeString&
4476 UnicodeString::operator= (UChar32 ch)
4477 { return replace(0, length(), ch); }
4478
4479 inline UnicodeString&
4480 UnicodeString::setTo(const UnicodeString& srcText,
4481              int32_t srcStart,
4482              int32_t srcLength)
4483 {
4484   unBogus();
4485   return doReplace(0, length(), srcText, srcStart, srcLength);
4486 }
4487
4488 inline UnicodeString&
4489 UnicodeString::setTo(const UnicodeString& srcText,
4490              int32_t srcStart)
4491 {
4492   unBogus();
4493   srcText.pinIndex(srcStart);
4494   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4495 }
4496
4497 inline UnicodeString&
4498 UnicodeString::setTo(const UnicodeString& srcText)
4499 {
4500   return copyFrom(srcText);
4501 }
4502
4503 inline UnicodeString&
4504 UnicodeString::setTo(const UChar *srcChars,
4505              int32_t srcLength)
4506 {
4507   unBogus();
4508   return doReplace(0, length(), srcChars, 0, srcLength);
4509 }
4510
4511 inline UnicodeString&
4512 UnicodeString::setTo(UChar srcChar)
4513 {
4514   unBogus();
4515   return doReplace(0, length(), &srcChar, 0, 1);
4516 }
4517
4518 inline UnicodeString&
4519 UnicodeString::setTo(UChar32 srcChar)
4520 {
4521   unBogus();
4522   return replace(0, length(), srcChar);
4523 }
4524
4525 inline UnicodeString&
4526 UnicodeString::append(const UnicodeString& srcText,
4527               int32_t srcStart,
4528               int32_t srcLength)
4529 { return doAppend(srcText, srcStart, srcLength); }
4530
4531 inline UnicodeString&
4532 UnicodeString::append(const UnicodeString& srcText)
4533 { return doAppend(srcText, 0, srcText.length()); }
4534
4535 inline UnicodeString&
4536 UnicodeString::append(const UChar *srcChars,
4537               int32_t srcStart,
4538               int32_t srcLength)
4539 { return doAppend(srcChars, srcStart, srcLength); }
4540
4541 inline UnicodeString&
4542 UnicodeString::append(const UChar *srcChars,
4543               int32_t srcLength)
4544 { return doAppend(srcChars, 0, srcLength); }
4545
4546 inline UnicodeString&
4547 UnicodeString::append(UChar srcChar)
4548 { return doAppend(&srcChar, 0, 1); }
4549
4550 inline UnicodeString&
4551 UnicodeString::operator+= (UChar ch)
4552 { return doAppend(&ch, 0, 1); }
4553
4554 inline UnicodeString&
4555 UnicodeString::operator+= (UChar32 ch) {
4556   return append(ch);
4557 }
4558
4559 inline UnicodeString&
4560 UnicodeString::operator+= (const UnicodeString& srcText)
4561 { return doAppend(srcText, 0, srcText.length()); }
4562
4563 inline UnicodeString&
4564 UnicodeString::insert(int32_t start,
4565               const UnicodeString& srcText,
4566               int32_t srcStart,
4567               int32_t srcLength)
4568 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4569
4570 inline UnicodeString&
4571 UnicodeString::insert(int32_t start,
4572               const UnicodeString& srcText)
4573 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4574
4575 inline UnicodeString&
4576 UnicodeString::insert(int32_t start,
4577               const UChar *srcChars,
4578               int32_t srcStart,
4579               int32_t srcLength)
4580 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4581
4582 inline UnicodeString&
4583 UnicodeString::insert(int32_t start,
4584               const UChar *srcChars,
4585               int32_t srcLength)
4586 { return doReplace(start, 0, srcChars, 0, srcLength); }
4587
4588 inline UnicodeString&
4589 UnicodeString::insert(int32_t start,
4590               UChar srcChar)
4591 { return doReplace(start, 0, &srcChar, 0, 1); }
4592
4593 inline UnicodeString&
4594 UnicodeString::insert(int32_t start,
4595               UChar32 srcChar)
4596 { return replace(start, 0, srcChar); }
4597
4598
4599 inline UnicodeString&
4600 UnicodeString::remove()
4601 {
4602   // remove() of a bogus string makes the string empty and non-bogus
4603   if(isBogus()) {
4604     setToEmpty();
4605   } else {
4606     setZeroLength();
4607   }
4608   return *this;
4609 }
4610
4611 inline UnicodeString&
4612 UnicodeString::remove(int32_t start,
4613              int32_t _length)
4614 {
4615     if(start <= 0 && _length == INT32_MAX) {
4616         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4617         return remove();
4618     }
4619     return doReplace(start, _length, NULL, 0, 0);
4620 }
4621
4622 inline UnicodeString&
4623 UnicodeString::removeBetween(int32_t start,
4624                 int32_t limit)
4625 { return doReplace(start, limit - start, NULL, 0, 0); }
4626
4627 inline UnicodeString &
4628 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4629   truncate(limit);
4630   return doReplace(0, start, NULL, 0, 0);
4631 }
4632
4633 inline UBool
4634 UnicodeString::truncate(int32_t targetLength)
4635 {
4636   if(isBogus() && targetLength == 0) {
4637     // truncate(0) of a bogus string makes the string empty and non-bogus
4638     unBogus();
4639     return FALSE;
4640   } else if((uint32_t)targetLength < (uint32_t)length()) {
4641     setLength(targetLength);
4642     return TRUE;
4643   } else {
4644     return FALSE;
4645   }
4646 }
4647
4648 inline UnicodeString&
4649 UnicodeString::reverse()
4650 { return doReverse(0, length()); }
4651
4652 inline UnicodeString&
4653 UnicodeString::reverse(int32_t start,
4654                int32_t _length)
4655 { return doReverse(start, _length); }
4656
4657 U_NAMESPACE_END
4658
4659 #endif