git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/unicode/unistr.h

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1998-2013, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	*
	7	* File unistr.h
	8	*
	9	* Modification History:
	10	*
	11	* Date Name Description
	12	* 09/25/98 stephen Creation.
	13	* 11/11/98 stephen Changed per 11/9 code review.
	14	* 04/20/99 stephen Overhauled per 4/16 code review.
	15	* 11/18/99 aliu Made to inherit from Replaceable. Added method
	16	* handleReplaceBetween(); other methods unchanged.
	17	* 06/25/01 grhoten Remove dependency on iostream.
	18	******************************************************************************
	19	*/
	20
	21	#ifndef UNISTR_H
	22	#define UNISTR_H
	23
	24	/**
	25	* \file
	26	* \brief C++ API: Unicode String
	27	*/
	28
	29	#include "unicode/utypes.h"
	30	#include "unicode/rep.h"
	31	#include "unicode/std_string.h"
	32	#include "unicode/stringpiece.h"
	33	#include "unicode/bytestream.h"
	34	#include "unicode/ucasemap.h"
	35
	36	struct UConverter; // unicode/ucnv.h
	37	class StringThreadTest;
	38
	39	#ifndef U_COMPARE_CODE_POINT_ORDER
	40	/* see also ustring.h and unorm.h */
	41	/**
	42	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
	43	* Compare strings in code point order instead of code unit order.
	44	* @stable ICU 2.2
	45	*/
	46	#define U_COMPARE_CODE_POINT_ORDER 0x8000
	47	#endif
	48
	49	#ifndef USTRING_H
	50	/**
	51	* \ingroup ustring_ustrlen
	52	*/
	53	U_STABLE int32_t U_EXPORT2
	54	u_strlen(const UChar *s);
	55	#endif
	56
	57	#ifndef U_HIDE_INTERNAL_API
	58	/**
	59	* \def U_STRING_CASE_MAPPER_DEFINED
	60	* @internal
	61	*/
	62
	63	#ifndef U_STRING_CASE_MAPPER_DEFINED
	64	#define U_STRING_CASE_MAPPER_DEFINED
	65
	66	/**
	67	* Internal string case mapping function type.
	68	* @internal
	69	*/
	70	typedef int32_t U_CALLCONV
	71	UStringCaseMapper(const UCaseMap *csm,
	72	UChar *dest, int32_t destCapacity,
	73	const UChar *src, int32_t srcLength,
	74	UErrorCode *pErrorCode);
	75
	76	#endif
	77	#endif /* U_HIDE_INTERNAL_API */
	78
	79	U_NAMESPACE_BEGIN
	80
	81	class BreakIterator; // unicode/brkiter.h
	82	class Locale; // unicode/locid.h
	83	class StringCharacterIterator;
	84	class UnicodeStringAppendable; // unicode/appendable.h
	85
	86	/* The <iostream> include has been moved to unicode/ustream.h */
	87
	88	/**
	89	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
	90	* which constructs a Unicode string from an invariant-character char * string.
	91	* About invariant characters see utypes.h.
	92	* This constructor has no runtime dependency on conversion code and is
	93	* therefore recommended over ones taking a charset name string
	94	* (where the empty string "" indicates invariant-character conversion).
	95	*
	96	* @stable ICU 3.2
	97	*/
	98	#define US_INV icu::UnicodeString::kInvariant
	99
	100	/**
	101	* Unicode String literals in C++.
	102	* Dependent on the platform properties, different UnicodeString
	103	* constructors should be used to create a UnicodeString object from
	104	* a string literal.
	105	* The macros are defined for maximum performance.
	106	* They work only for strings that contain "invariant characters", i.e.,
	107	* only latin letters, digits, and some punctuation.
	108	* See utypes.h for details.
	109	*
	110	* The string parameter must be a C string literal.
	111	* The length of the string, not including the terminating
	112	* <code>NUL</code>, must be specified as a constant.
	113	* The U_STRING_DECL macro should be invoked exactly once for one
	114	* such string variable before it is used.
	115	* @stable ICU 2.0
	116	*/
	117	#if defined(U_DECLARE_UTF16)
	118	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
	119	#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
	120	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
	121	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
	122	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
	123	#else
	124	# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
	125	#endif
	126
	127	/**
	128	* Unicode String literals in C++.
	129	* Dependent on the platform properties, different UnicodeString
	130	* constructors should be used to create a UnicodeString object from
	131	* a string literal.
	132	* The macros are defined for improved performance.
	133	* They work only for strings that contain "invariant characters", i.e.,
	134	* only latin letters, digits, and some punctuation.
	135	* See utypes.h for details.
	136	*
	137	* The string parameter must be a C string literal.
	138	* @stable ICU 2.0
	139	*/
	140	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
	141
	142	/**
	143	* \def UNISTR_FROM_CHAR_EXPLICIT
	144	* This can be defined to be empty or "explicit".
	145	* If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
	146	* constructors are marked as explicit, preventing their inadvertent use.
	147	* @stable ICU 49
	148	*/
	149	#ifndef UNISTR_FROM_CHAR_EXPLICIT
	150	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
	151	// Auto-"explicit" in ICU library code.
	152	# define UNISTR_FROM_CHAR_EXPLICIT explicit
	153	# else
	154	// Empty by default for source code compatibility.
	155	# define UNISTR_FROM_CHAR_EXPLICIT
	156	# endif
	157	#endif
	158
	159	/**
	160	* \def UNISTR_FROM_STRING_EXPLICIT
	161	* This can be defined to be empty or "explicit".
	162	* If explicit, then the UnicodeString(const char ) and UnicodeString(const UChar )
	163	* constructors are marked as explicit, preventing their inadvertent use.
	164	*
	165	* In particular, this helps prevent accidentally depending on ICU conversion code
	166	* by passing a string literal into an API with a const UnicodeString & parameter.
	167	* @stable ICU 49
	168	*/
	169	#ifndef UNISTR_FROM_STRING_EXPLICIT
	170	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
	171	// Auto-"explicit" in ICU library code.
	172	# define UNISTR_FROM_STRING_EXPLICIT explicit
	173	# else
	174	// Empty by default for source code compatibility.
	175	# define UNISTR_FROM_STRING_EXPLICIT
	176	# endif
	177	#endif
	178
	179	/**
	180	* UnicodeString is a string class that stores Unicode characters directly and provides
	181	* similar functionality as the Java String and StringBuffer classes.
	182	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
	183	*
	184	* The UnicodeString class is not suitable for subclassing.
	185	*
	186	* <p>For an overview of Unicode strings in C and C++ see the
	187	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
	188	*
	189	* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
	190	* A Unicode character may be stored with either one code unit
	191	* (the most common case) or with a matched pair of special code units
	192	* ("surrogates"). The data type for code units is UChar.
	193	* For single-character handling, a Unicode character code <em>point</em> is a value
	194	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
	195	*
	196	* <p>Indexes and offsets into and lengths of strings always count code units, not code points.
	197	* This is the same as with multi-byte char* strings in traditional string handling.
	198	* Operations on partial strings typically do not test for code point boundaries.
	199	* If necessary, the user needs to take care of such boundaries by testing for the code unit
	200	* values or by using functions like
	201	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
	202	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
	203	*
	204	* UnicodeString methods are more lenient with regard to input parameter values
	205	* than other ICU APIs. In particular:
	206	* - If indexes are out of bounds for a UnicodeString object
	207	* (<0 or >length()) then they are "pinned" to the nearest boundary.
	208	* - If primitive string pointer values (e.g., const UChar * or char *)
	209	* for input strings are NULL, then those input string parameters are treated
	210	* as if they pointed to an empty string.
	211	* However, this is <em>not</em> the case for char * parameters for charset names
	212	* or other IDs.
	213	* - Most UnicodeString methods do not take a UErrorCode parameter because
	214	* there are usually very few opportunities for failure other than a shortage
	215	* of memory, error codes in low-level C++ string methods would be inconvenient,
	216	* and the error code as the last parameter (ICU convention) would prevent
	217	* the use of default parameter values.
	218	* Instead, such methods set the UnicodeString into a "bogus" state
	219	* (see isBogus()) if an error occurs.
	220	*
	221	* In string comparisons, two UnicodeString objects that are both "bogus"
	222	* compare equal (to be transitive and prevent endless loops in sorting),
	223	* and a "bogus" string compares less than any non-"bogus" one.
	224	*
	225	* Const UnicodeString methods are thread-safe. Multiple threads can use
	226	* const methods on the same UnicodeString object simultaneously,
	227	* but non-const methods must not be called concurrently (in multiple threads)
	228	* with any other (const or non-const) methods.
	229	*
	230	* Similarly, const UnicodeString & parameters are thread-safe.
	231	* One object may be passed in as such a parameter concurrently in multiple threads.
	232	* This includes the const UnicodeString & parameters for
	233	* copy construction, assignment, and cloning.
	234	*
	235	* <p>UnicodeString uses several storage methods.
	236	* String contents can be stored inside the UnicodeString object itself,
	237	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
	238	* Most of this is done transparently, but careful aliasing in particular provides
	239	* significant performance improvements.
	240	* Also, the internal buffer is accessible via special functions.
	241	* For details see the
	242	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
	243	*
	244	* @see utf.h
	245	* @see CharacterIterator
	246	* @stable ICU 2.0
	247	*/
	248	class U_COMMON_API UnicodeString : public Replaceable
	249	{
	250	public:
	251
	252	/**
	253	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
	254	* which constructs a Unicode string from an invariant-character char * string.
	255	* Use the macro US_INV instead of the full qualification for this value.
	256	*
	257	* @see US_INV
	258	* @stable ICU 3.2
	259	*/
	260	enum EInvariant {
	261	/**
	262	* @see EInvariant
	263	* @stable ICU 3.2
	264	*/
	265	kInvariant
	266	};
	267
	268	//========================================
	269	// Read-only operations
	270	//========================================
	271
	272	/* Comparison - bitwise only - for international comparison use collation */
	273
	274	/**
	275	* Equality operator. Performs only bitwise comparison.
	276	* @param text The UnicodeString to compare to this one.
	277	* @return TRUE if <TT>text</TT> contains the same characters as this one,
	278	* FALSE otherwise.
	279	* @stable ICU 2.0
	280	*/
	281	inline UBool operator== (const UnicodeString& text) const;
	282
	283	/**
	284	* Inequality operator. Performs only bitwise comparison.
	285	* @param text The UnicodeString to compare to this one.
	286	* @return FALSE if <TT>text</TT> contains the same characters as this one,
	287	* TRUE otherwise.
	288	* @stable ICU 2.0
	289	*/
	290	inline UBool operator!= (const UnicodeString& text) const;
	291
	292	/**
	293	* Greater than operator. Performs only bitwise comparison.
	294	* @param text The UnicodeString to compare to this one.
	295	* @return TRUE if the characters in this are bitwise
	296	* greater than the characters in <code>text</code>, FALSE otherwise
	297	* @stable ICU 2.0
	298	*/
	299	inline UBool operator> (const UnicodeString& text) const;
	300
	301	/**
	302	* Less than operator. Performs only bitwise comparison.
	303	* @param text The UnicodeString to compare to this one.
	304	* @return TRUE if the characters in this are bitwise
	305	* less than the characters in <code>text</code>, FALSE otherwise
	306	* @stable ICU 2.0
	307	*/
	308	inline UBool operator< (const UnicodeString& text) const;
	309
	310	/**
	311	* Greater than or equal operator. Performs only bitwise comparison.
	312	* @param text The UnicodeString to compare to this one.
	313	* @return TRUE if the characters in this are bitwise
	314	* greater than or equal to the characters in <code>text</code>, FALSE otherwise
	315	* @stable ICU 2.0
	316	*/
	317	inline UBool operator>= (const UnicodeString& text) const;
	318
	319	/**
	320	* Less than or equal operator. Performs only bitwise comparison.
	321	* @param text The UnicodeString to compare to this one.
	322	* @return TRUE if the characters in this are bitwise
	323	* less than or equal to the characters in <code>text</code>, FALSE otherwise
	324	* @stable ICU 2.0
	325	*/
	326	inline UBool operator<= (const UnicodeString& text) const;
	327
	328	/**
	329	* Compare the characters bitwise in this UnicodeString to
	330	* the characters in <code>text</code>.
	331	* @param text The UnicodeString to compare to this one.
	332	* @return The result of bitwise character comparison: 0 if this
	333	* contains the same characters as <code>text</code>, -1 if the characters in
	334	* this are bitwise less than the characters in <code>text</code>, +1 if the
	335	* characters in this are bitwise greater than the characters
	336	* in <code>text</code>.
	337	* @stable ICU 2.0
	338	*/
	339	inline int8_t compare(const UnicodeString& text) const;
	340
	341	/**
	342	* Compare the characters bitwise in the range
	343	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	344	* in <TT>text</TT>
	345	* @param start the offset at which the compare operation begins
	346	* @param length the number of characters of text to compare.
	347	* @param text the other text to be compared against this string.
	348	* @return The result of bitwise character comparison: 0 if this
	349	* contains the same characters as <code>text</code>, -1 if the characters in
	350	* this are bitwise less than the characters in <code>text</code>, +1 if the
	351	* characters in this are bitwise greater than the characters
	352	* in <code>text</code>.
	353	* @stable ICU 2.0
	354	*/
	355	inline int8_t compare(int32_t start,
	356	int32_t length,
	357	const UnicodeString& text) const;
	358
	359	/**
	360	* Compare the characters bitwise in the range
	361	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	362	* in <TT>srcText</TT> in the range
	363	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	364	* @param start the offset at which the compare operation begins
	365	* @param length the number of characters in this to compare.
	366	* @param srcText the text to be compared
	367	* @param srcStart the offset into <TT>srcText</TT> to start comparison
	368	* @param srcLength the number of characters in <TT>src</TT> to compare
	369	* @return The result of bitwise character comparison: 0 if this
	370	* contains the same characters as <code>srcText</code>, -1 if the characters in
	371	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
	372	* characters in this are bitwise greater than the characters
	373	* in <code>srcText</code>.
	374	* @stable ICU 2.0
	375	*/
	376	inline int8_t compare(int32_t start,
	377	int32_t length,
	378	const UnicodeString& srcText,
	379	int32_t srcStart,
	380	int32_t srcLength) const;
	381
	382	/**
	383	* Compare the characters bitwise in this UnicodeString with the first
	384	* <TT>srcLength</TT> characters in <TT>srcChars</TT>.
	385	* @param srcChars The characters to compare to this UnicodeString.
	386	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
	387	* @return The result of bitwise character comparison: 0 if this
	388	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	389	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	390	* characters in this are bitwise greater than the characters
	391	* in <code>srcChars</code>.
	392	* @stable ICU 2.0
	393	*/
	394	inline int8_t compare(const UChar *srcChars,
	395	int32_t srcLength) const;
	396
	397	/**
	398	* Compare the characters bitwise in the range
	399	* [<TT>start</TT>, <TT>start + length</TT>) with the first
	400	* <TT>length</TT> characters in <TT>srcChars</TT>
	401	* @param start the offset at which the compare operation begins
	402	* @param length the number of characters to compare.
	403	* @param srcChars the characters to be compared
	404	* @return The result of bitwise character comparison: 0 if this
	405	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	406	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	407	* characters in this are bitwise greater than the characters
	408	* in <code>srcChars</code>.
	409	* @stable ICU 2.0
	410	*/
	411	inline int8_t compare(int32_t start,
	412	int32_t length,
	413	const UChar *srcChars) const;
	414
	415	/**
	416	* Compare the characters bitwise in the range
	417	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	418	* in <TT>srcChars</TT> in the range
	419	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	420	* @param start the offset at which the compare operation begins
	421	* @param length the number of characters in this to compare
	422	* @param srcChars the characters to be compared
	423	* @param srcStart the offset into <TT>srcChars</TT> to start comparison
	424	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
	425	* @return The result of bitwise character comparison: 0 if this
	426	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	427	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	428	* characters in this are bitwise greater than the characters
	429	* in <code>srcChars</code>.
	430	* @stable ICU 2.0
	431	*/
	432	inline int8_t compare(int32_t start,
	433	int32_t length,
	434	const UChar *srcChars,
	435	int32_t srcStart,
	436	int32_t srcLength) const;
	437
	438	/**
	439	* Compare the characters bitwise in the range
	440	* [<TT>start</TT>, <TT>limit</TT>) with the characters
	441	* in <TT>srcText</TT> in the range
	442	* [<TT>srcStart</TT>, <TT>srcLimit</TT>).
	443	* @param start the offset at which the compare operation begins
	444	* @param limit the offset immediately following the compare operation
	445	* @param srcText the text to be compared
	446	* @param srcStart the offset into <TT>srcText</TT> to start comparison
	447	* @param srcLimit the offset into <TT>srcText</TT> to limit comparison
	448	* @return The result of bitwise character comparison: 0 if this
	449	* contains the same characters as <code>srcText</code>, -1 if the characters in
	450	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
	451	* characters in this are bitwise greater than the characters
	452	* in <code>srcText</code>.
	453	* @stable ICU 2.0
	454	*/
	455	inline int8_t compareBetween(int32_t start,
	456	int32_t limit,
	457	const UnicodeString& srcText,
	458	int32_t srcStart,
	459	int32_t srcLimit) const;
	460
	461	/**
	462	* Compare two Unicode strings in code point order.
	463	* The result may be different from the results of compare(), operator<, etc.
	464	* if supplementary characters are present:
	465	*
	466	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	467	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	468	* which means that they compare as less than some other BMP characters like U+feff.
	469	* This function compares Unicode strings in code point order.
	470	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	471	*
	472	* @param text Another string to compare this one to.
	473	* @return a negative/zero/positive integer corresponding to whether
	474	* this string is less than/equal to/greater than the second one
	475	* in code point order
	476	* @stable ICU 2.0
	477	*/
	478	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
	479
	480	/**
	481	* Compare two Unicode strings in code point order.
	482	* The result may be different from the results of compare(), operator<, etc.
	483	* if supplementary characters are present:
	484	*
	485	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	486	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	487	* which means that they compare as less than some other BMP characters like U+feff.
	488	* This function compares Unicode strings in code point order.
	489	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	490	*
	491	* @param start The start offset in this string at which the compare operation begins.
	492	* @param length The number of code units from this string to compare.
	493	* @param srcText Another string to compare this one to.
	494	* @return a negative/zero/positive integer corresponding to whether
	495	* this string is less than/equal to/greater than the second one
	496	* in code point order
	497	* @stable ICU 2.0
	498	*/
	499	inline int8_t compareCodePointOrder(int32_t start,
	500	int32_t length,
	501	const UnicodeString& srcText) const;
	502
	503	/**
	504	* Compare two Unicode strings in code point order.
	505	* The result may be different from the results of compare(), operator<, etc.
	506	* if supplementary characters are present:
	507	*
	508	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	509	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	510	* which means that they compare as less than some other BMP characters like U+feff.
	511	* This function compares Unicode strings in code point order.
	512	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	513	*
	514	* @param start The start offset in this string at which the compare operation begins.
	515	* @param length The number of code units from this string to compare.
	516	* @param srcText Another string to compare this one to.
	517	* @param srcStart The start offset in that string at which the compare operation begins.
	518	* @param srcLength The number of code units from that string to compare.
	519	* @return a negative/zero/positive integer corresponding to whether
	520	* this string is less than/equal to/greater than the second one
	521	* in code point order
	522	* @stable ICU 2.0
	523	*/
	524	inline int8_t compareCodePointOrder(int32_t start,
	525	int32_t length,
	526	const UnicodeString& srcText,
	527	int32_t srcStart,
	528	int32_t srcLength) const;
	529
	530	/**
	531	* Compare two Unicode strings in code point order.
	532	* The result may be different from the results of compare(), operator<, etc.
	533	* if supplementary characters are present:
	534	*
	535	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	536	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	537	* which means that they compare as less than some other BMP characters like U+feff.
	538	* This function compares Unicode strings in code point order.
	539	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	540	*
	541	* @param srcChars A pointer to another string to compare this one to.
	542	* @param srcLength The number of code units from that string to compare.
	543	* @return a negative/zero/positive integer corresponding to whether
	544	* this string is less than/equal to/greater than the second one
	545	* in code point order
	546	* @stable ICU 2.0
	547	*/
	548	inline int8_t compareCodePointOrder(const UChar *srcChars,
	549	int32_t srcLength) const;
	550
	551	/**
	552	* Compare two Unicode strings in code point order.
	553	* The result may be different from the results of compare(), operator<, etc.
	554	* if supplementary characters are present:
	555	*
	556	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	557	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	558	* which means that they compare as less than some other BMP characters like U+feff.
	559	* This function compares Unicode strings in code point order.
	560	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	561	*
	562	* @param start The start offset in this string at which the compare operation begins.
	563	* @param length The number of code units from this string to compare.
	564	* @param srcChars A pointer to another string to compare this one to.
	565	* @return a negative/zero/positive integer corresponding to whether
	566	* this string is less than/equal to/greater than the second one
	567	* in code point order
	568	* @stable ICU 2.0
	569	*/
	570	inline int8_t compareCodePointOrder(int32_t start,
	571	int32_t length,
	572	const UChar *srcChars) const;
	573
	574	/**
	575	* Compare two Unicode strings in code point order.
	576	* The result may be different from the results of compare(), operator<, etc.
	577	* if supplementary characters are present:
	578	*
	579	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	580	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	581	* which means that they compare as less than some other BMP characters like U+feff.
	582	* This function compares Unicode strings in code point order.
	583	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	584	*
	585	* @param start The start offset in this string at which the compare operation begins.
	586	* @param length The number of code units from this string to compare.
	587	* @param srcChars A pointer to another string to compare this one to.
	588	* @param srcStart The start offset in that string at which the compare operation begins.
	589	* @param srcLength The number of code units from that string to compare.
	590	* @return a negative/zero/positive integer corresponding to whether
	591	* this string is less than/equal to/greater than the second one
	592	* in code point order
	593	* @stable ICU 2.0
	594	*/
	595	inline int8_t compareCodePointOrder(int32_t start,
	596	int32_t length,
	597	const UChar *srcChars,
	598	int32_t srcStart,
	599	int32_t srcLength) const;
	600
	601	/**
	602	* Compare two Unicode strings in code point order.
	603	* The result may be different from the results of compare(), operator<, etc.
	604	* if supplementary characters are present:
	605	*
	606	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	607	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	608	* which means that they compare as less than some other BMP characters like U+feff.
	609	* This function compares Unicode strings in code point order.
	610	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	611	*
	612	* @param start The start offset in this string at which the compare operation begins.
	613	* @param limit The offset after the last code unit from this string to compare.
	614	* @param srcText Another string to compare this one to.
	615	* @param srcStart The start offset in that string at which the compare operation begins.
	616	* @param srcLimit The offset after the last code unit from that string to compare.
	617	* @return a negative/zero/positive integer corresponding to whether
	618	* this string is less than/equal to/greater than the second one
	619	* in code point order
	620	* @stable ICU 2.0
	621	*/
	622	inline int8_t compareCodePointOrderBetween(int32_t start,
	623	int32_t limit,
	624	const UnicodeString& srcText,
	625	int32_t srcStart,
	626	int32_t srcLimit) const;
	627
	628	/**
	629	* Compare two strings case-insensitively using full case folding.
	630	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
	631	*
	632	* @param text Another string to compare this one to.
	633	* @param options A bit set of options:
	634	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	635	* Comparison in code unit order with default case folding.
	636	*
	637	* - U_COMPARE_CODE_POINT_ORDER
	638	* Set to choose code point order instead of code unit order
	639	* (see u_strCompare for details).
	640	*
	641	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	642	*
	643	* @return A negative, zero, or positive integer indicating the comparison result.
	644	* @stable ICU 2.0
	645	*/
	646	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
	647
	648	/**
	649	* Compare two strings case-insensitively using full case folding.
	650	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
	651	*
	652	* @param start The start offset in this string at which the compare operation begins.
	653	* @param length The number of code units from this string to compare.
	654	* @param srcText Another string to compare this one to.
	655	* @param options A bit set of options:
	656	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	657	* Comparison in code unit order with default case folding.
	658	*
	659	* - U_COMPARE_CODE_POINT_ORDER
	660	* Set to choose code point order instead of code unit order
	661	* (see u_strCompare for details).
	662	*
	663	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	664	*
	665	* @return A negative, zero, or positive integer indicating the comparison result.
	666	* @stable ICU 2.0
	667	*/
	668	inline int8_t caseCompare(int32_t start,
	669	int32_t length,
	670	const UnicodeString& srcText,
	671	uint32_t options) const;
	672
	673	/**
	674	* Compare two strings case-insensitively using full case folding.
	675	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
	676	*
	677	* @param start The start offset in this string at which the compare operation begins.
	678	* @param length The number of code units from this string to compare.
	679	* @param srcText Another string to compare this one to.
	680	* @param srcStart The start offset in that string at which the compare operation begins.
	681	* @param srcLength The number of code units from that string to compare.
	682	* @param options A bit set of options:
	683	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	684	* Comparison in code unit order with default case folding.
	685	*
	686	* - U_COMPARE_CODE_POINT_ORDER
	687	* Set to choose code point order instead of code unit order
	688	* (see u_strCompare for details).
	689	*
	690	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	691	*
	692	* @return A negative, zero, or positive integer indicating the comparison result.
	693	* @stable ICU 2.0
	694	*/
	695	inline int8_t caseCompare(int32_t start,
	696	int32_t length,
	697	const UnicodeString& srcText,
	698	int32_t srcStart,
	699	int32_t srcLength,
	700	uint32_t options) const;
	701
	702	/**
	703	* Compare two strings case-insensitively using full case folding.
	704	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	705	*
	706	* @param srcChars A pointer to another string to compare this one to.
	707	* @param srcLength The number of code units from that string to compare.
	708	* @param options A bit set of options:
	709	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	710	* Comparison in code unit order with default case folding.
	711	*
	712	* - U_COMPARE_CODE_POINT_ORDER
	713	* Set to choose code point order instead of code unit order
	714	* (see u_strCompare for details).
	715	*
	716	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	717	*
	718	* @return A negative, zero, or positive integer indicating the comparison result.
	719	* @stable ICU 2.0
	720	*/
	721	inline int8_t caseCompare(const UChar *srcChars,
	722	int32_t srcLength,
	723	uint32_t options) const;
	724
	725	/**
	726	* Compare two strings case-insensitively using full case folding.
	727	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	728	*
	729	* @param start The start offset in this string at which the compare operation begins.
	730	* @param length The number of code units from this string to compare.
	731	* @param srcChars A pointer to another string to compare this one to.
	732	* @param options A bit set of options:
	733	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	734	* Comparison in code unit order with default case folding.
	735	*
	736	* - U_COMPARE_CODE_POINT_ORDER
	737	* Set to choose code point order instead of code unit order
	738	* (see u_strCompare for details).
	739	*
	740	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	741	*
	742	* @return A negative, zero, or positive integer indicating the comparison result.
	743	* @stable ICU 2.0
	744	*/
	745	inline int8_t caseCompare(int32_t start,
	746	int32_t length,
	747	const UChar *srcChars,
	748	uint32_t options) const;
	749
	750	/**
	751	* Compare two strings case-insensitively using full case folding.
	752	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	753	*
	754	* @param start The start offset in this string at which the compare operation begins.
	755	* @param length The number of code units from this string to compare.
	756	* @param srcChars A pointer to another string to compare this one to.
	757	* @param srcStart The start offset in that string at which the compare operation begins.
	758	* @param srcLength The number of code units from that string to compare.
	759	* @param options A bit set of options:
	760	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	761	* Comparison in code unit order with default case folding.
	762	*
	763	* - U_COMPARE_CODE_POINT_ORDER
	764	* Set to choose code point order instead of code unit order
	765	* (see u_strCompare for details).
	766	*
	767	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	768	*
	769	* @return A negative, zero, or positive integer indicating the comparison result.
	770	* @stable ICU 2.0
	771	*/
	772	inline int8_t caseCompare(int32_t start,
	773	int32_t length,
	774	const UChar *srcChars,
	775	int32_t srcStart,
	776	int32_t srcLength,
	777	uint32_t options) const;
	778
	779	/**
	780	* Compare two strings case-insensitively using full case folding.
	781	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
	782	*
	783	* @param start The start offset in this string at which the compare operation begins.
	784	* @param limit The offset after the last code unit from this string to compare.
	785	* @param srcText Another string to compare this one to.
	786	* @param srcStart The start offset in that string at which the compare operation begins.
	787	* @param srcLimit The offset after the last code unit from that string to compare.
	788	* @param options A bit set of options:
	789	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	790	* Comparison in code unit order with default case folding.
	791	*
	792	* - U_COMPARE_CODE_POINT_ORDER
	793	* Set to choose code point order instead of code unit order
	794	* (see u_strCompare for details).
	795	*
	796	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	797	*
	798	* @return A negative, zero, or positive integer indicating the comparison result.
	799	* @stable ICU 2.0
	800	*/
	801	inline int8_t caseCompareBetween(int32_t start,
	802	int32_t limit,
	803	const UnicodeString& srcText,
	804	int32_t srcStart,
	805	int32_t srcLimit,
	806	uint32_t options) const;
	807
	808	/**
	809	* Determine if this starts with the characters in <TT>text</TT>
	810	* @param text The text to match.
	811	* @return TRUE if this starts with the characters in <TT>text</TT>,
	812	* FALSE otherwise
	813	* @stable ICU 2.0
	814	*/
	815	inline UBool startsWith(const UnicodeString& text) const;
	816
	817	/**
	818	* Determine if this starts with the characters in <TT>srcText</TT>
	819	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	820	* @param srcText The text to match.
	821	* @param srcStart the offset into <TT>srcText</TT> to start matching
	822	* @param srcLength the number of characters in <TT>srcText</TT> to match
	823	* @return TRUE if this starts with the characters in <TT>text</TT>,
	824	* FALSE otherwise
	825	* @stable ICU 2.0
	826	*/
	827	inline UBool startsWith(const UnicodeString& srcText,
	828	int32_t srcStart,
	829	int32_t srcLength) const;
	830
	831	/**
	832	* Determine if this starts with the characters in <TT>srcChars</TT>
	833	* @param srcChars The characters to match.
	834	* @param srcLength the number of characters in <TT>srcChars</TT>
	835	* @return TRUE if this starts with the characters in <TT>srcChars</TT>,
	836	* FALSE otherwise
	837	* @stable ICU 2.0
	838	*/
	839	inline UBool startsWith(const UChar *srcChars,
	840	int32_t srcLength) const;
	841
	842	/**
	843	* Determine if this ends with the characters in <TT>srcChars</TT>
	844	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	845	* @param srcChars The characters to match.
	846	* @param srcStart the offset into <TT>srcText</TT> to start matching
	847	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	848	* @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
	849	* @stable ICU 2.0
	850	*/
	851	inline UBool startsWith(const UChar *srcChars,
	852	int32_t srcStart,
	853	int32_t srcLength) const;
	854
	855	/**
	856	* Determine if this ends with the characters in <TT>text</TT>
	857	* @param text The text to match.
	858	* @return TRUE if this ends with the characters in <TT>text</TT>,
	859	* FALSE otherwise
	860	* @stable ICU 2.0
	861	*/
	862	inline UBool endsWith(const UnicodeString& text) const;
	863
	864	/**
	865	* Determine if this ends with the characters in <TT>srcText</TT>
	866	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	867	* @param srcText The text to match.
	868	* @param srcStart the offset into <TT>srcText</TT> to start matching
	869	* @param srcLength the number of characters in <TT>srcText</TT> to match
	870	* @return TRUE if this ends with the characters in <TT>text</TT>,
	871	* FALSE otherwise
	872	* @stable ICU 2.0
	873	*/
	874	inline UBool endsWith(const UnicodeString& srcText,
	875	int32_t srcStart,
	876	int32_t srcLength) const;
	877
	878	/**
	879	* Determine if this ends with the characters in <TT>srcChars</TT>
	880	* @param srcChars The characters to match.
	881	* @param srcLength the number of characters in <TT>srcChars</TT>
	882	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
	883	* FALSE otherwise
	884	* @stable ICU 2.0
	885	*/
	886	inline UBool endsWith(const UChar *srcChars,
	887	int32_t srcLength) const;
	888
	889	/**
	890	* Determine if this ends with the characters in <TT>srcChars</TT>
	891	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	892	* @param srcChars The characters to match.
	893	* @param srcStart the offset into <TT>srcText</TT> to start matching
	894	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	895	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
	896	* FALSE otherwise
	897	* @stable ICU 2.0
	898	*/
	899	inline UBool endsWith(const UChar *srcChars,
	900	int32_t srcStart,
	901	int32_t srcLength) const;
	902
	903
	904	/* Searching - bitwise only */
	905
	906	/**
	907	* Locate in this the first occurrence of the characters in <TT>text</TT>,
	908	* using bitwise comparison.
	909	* @param text The text to search for.
	910	* @return The offset into this of the start of <TT>text</TT>,
	911	* or -1 if not found.
	912	* @stable ICU 2.0
	913	*/
	914	inline int32_t indexOf(const UnicodeString& text) const;
	915
	916	/**
	917	* Locate in this the first occurrence of the characters in <TT>text</TT>
	918	* starting at offset <TT>start</TT>, using bitwise comparison.
	919	* @param text The text to search for.
	920	* @param start The offset at which searching will start.
	921	* @return The offset into this of the start of <TT>text</TT>,
	922	* or -1 if not found.
	923	* @stable ICU 2.0
	924	*/
	925	inline int32_t indexOf(const UnicodeString& text,
	926	int32_t start) const;
	927
	928	/**
	929	* Locate in this the first occurrence in the range
	930	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	931	* in <TT>text</TT>, using bitwise comparison.
	932	* @param text The text to search for.
	933	* @param start The offset at which searching will start.
	934	* @param length The number of characters to search
	935	* @return The offset into this of the start of <TT>text</TT>,
	936	* or -1 if not found.
	937	* @stable ICU 2.0
	938	*/
	939	inline int32_t indexOf(const UnicodeString& text,
	940	int32_t start,
	941	int32_t length) const;
	942
	943	/**
	944	* Locate in this the first occurrence in the range
	945	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	946	* in <TT>srcText</TT> in the range
	947	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	948	* using bitwise comparison.
	949	* @param srcText The text to search for.
	950	* @param srcStart the offset into <TT>srcText</TT> at which
	951	* to start matching
	952	* @param srcLength the number of characters in <TT>srcText</TT> to match
	953	* @param start the offset into this at which to start matching
	954	* @param length the number of characters in this to search
	955	* @return The offset into this of the start of <TT>text</TT>,
	956	* or -1 if not found.
	957	* @stable ICU 2.0
	958	*/
	959	inline int32_t indexOf(const UnicodeString& srcText,
	960	int32_t srcStart,
	961	int32_t srcLength,
	962	int32_t start,
	963	int32_t length) const;
	964
	965	/**
	966	* Locate in this the first occurrence of the characters in
	967	* <TT>srcChars</TT>
	968	* starting at offset <TT>start</TT>, using bitwise comparison.
	969	* @param srcChars The text to search for.
	970	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	971	* @param start the offset into this at which to start matching
	972	* @return The offset into this of the start of <TT>text</TT>,
	973	* or -1 if not found.
	974	* @stable ICU 2.0
	975	*/
	976	inline int32_t indexOf(const UChar *srcChars,
	977	int32_t srcLength,
	978	int32_t start) const;
	979
	980	/**
	981	* Locate in this the first occurrence in the range
	982	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	983	* in <TT>srcChars</TT>, using bitwise comparison.
	984	* @param srcChars The text to search for.
	985	* @param srcLength the number of characters in <TT>srcChars</TT>
	986	* @param start The offset at which searching will start.
	987	* @param length The number of characters to search
	988	* @return The offset into this of the start of <TT>srcChars</TT>,
	989	* or -1 if not found.
	990	* @stable ICU 2.0
	991	*/
	992	inline int32_t indexOf(const UChar *srcChars,
	993	int32_t srcLength,
	994	int32_t start,
	995	int32_t length) const;
	996
	997	/**
	998	* Locate in this the first occurrence in the range
	999	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1000	* in <TT>srcChars</TT> in the range
	1001	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	1002	* using bitwise comparison.
	1003	* @param srcChars The text to search for.
	1004	* @param srcStart the offset into <TT>srcChars</TT> at which
	1005	* to start matching
	1006	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1007	* @param start the offset into this at which to start matching
	1008	* @param length the number of characters in this to search
	1009	* @return The offset into this of the start of <TT>text</TT>,
	1010	* or -1 if not found.
	1011	* @stable ICU 2.0
	1012	*/
	1013	int32_t indexOf(const UChar *srcChars,
	1014	int32_t srcStart,
	1015	int32_t srcLength,
	1016	int32_t start,
	1017	int32_t length) const;
	1018
	1019	/**
	1020	* Locate in this the first occurrence of the BMP code point <code>c</code>,
	1021	* using bitwise comparison.
	1022	* @param c The code unit to search for.
	1023	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1024	* @stable ICU 2.0
	1025	*/
	1026	inline int32_t indexOf(UChar c) const;
	1027
	1028	/**
	1029	* Locate in this the first occurrence of the code point <TT>c</TT>,
	1030	* using bitwise comparison.
	1031	*
	1032	* @param c The code point to search for.
	1033	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1034	* @stable ICU 2.0
	1035	*/
	1036	inline int32_t indexOf(UChar32 c) const;
	1037
	1038	/**
	1039	* Locate in this the first occurrence of the BMP code point <code>c</code>,
	1040	* starting at offset <TT>start</TT>, using bitwise comparison.
	1041	* @param c The code unit to search for.
	1042	* @param start The offset at which searching will start.
	1043	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1044	* @stable ICU 2.0
	1045	*/
	1046	inline int32_t indexOf(UChar c,
	1047	int32_t start) const;
	1048
	1049	/**
	1050	* Locate in this the first occurrence of the code point <TT>c</TT>
	1051	* starting at offset <TT>start</TT>, using bitwise comparison.
	1052	*
	1053	* @param c The code point to search for.
	1054	* @param start The offset at which searching will start.
	1055	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1056	* @stable ICU 2.0
	1057	*/
	1058	inline int32_t indexOf(UChar32 c,
	1059	int32_t start) const;
	1060
	1061	/**
	1062	* Locate in this the first occurrence of the BMP code point <code>c</code>
	1063	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1064	* using bitwise comparison.
	1065	* @param c The code unit to search for.
	1066	* @param start the offset into this at which to start matching
	1067	* @param length the number of characters in this to search
	1068	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1069	* @stable ICU 2.0
	1070	*/
	1071	inline int32_t indexOf(UChar c,
	1072	int32_t start,
	1073	int32_t length) const;
	1074
	1075	/**
	1076	* Locate in this the first occurrence of the code point <TT>c</TT>
	1077	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1078	* using bitwise comparison.
	1079	*
	1080	* @param c The code point to search for.
	1081	* @param start the offset into this at which to start matching
	1082	* @param length the number of characters in this to search
	1083	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1084	* @stable ICU 2.0
	1085	*/
	1086	inline int32_t indexOf(UChar32 c,
	1087	int32_t start,
	1088	int32_t length) const;
	1089
	1090	/**
	1091	* Locate in this the last occurrence of the characters in <TT>text</TT>,
	1092	* using bitwise comparison.
	1093	* @param text The text to search for.
	1094	* @return The offset into this of the start of <TT>text</TT>,
	1095	* or -1 if not found.
	1096	* @stable ICU 2.0
	1097	*/
	1098	inline int32_t lastIndexOf(const UnicodeString& text) const;
	1099
	1100	/**
	1101	* Locate in this the last occurrence of the characters in <TT>text</TT>
	1102	* starting at offset <TT>start</TT>, using bitwise comparison.
	1103	* @param text The text to search for.
	1104	* @param start The offset at which searching will start.
	1105	* @return The offset into this of the start of <TT>text</TT>,
	1106	* or -1 if not found.
	1107	* @stable ICU 2.0
	1108	*/
	1109	inline int32_t lastIndexOf(const UnicodeString& text,
	1110	int32_t start) const;
	1111
	1112	/**
	1113	* Locate in this the last occurrence in the range
	1114	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1115	* in <TT>text</TT>, using bitwise comparison.
	1116	* @param text The text to search for.
	1117	* @param start The offset at which searching will start.
	1118	* @param length The number of characters to search
	1119	* @return The offset into this of the start of <TT>text</TT>,
	1120	* or -1 if not found.
	1121	* @stable ICU 2.0
	1122	*/
	1123	inline int32_t lastIndexOf(const UnicodeString& text,
	1124	int32_t start,
	1125	int32_t length) const;
	1126
	1127	/**
	1128	* Locate in this the last occurrence in the range
	1129	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1130	* in <TT>srcText</TT> in the range
	1131	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	1132	* using bitwise comparison.
	1133	* @param srcText The text to search for.
	1134	* @param srcStart the offset into <TT>srcText</TT> at which
	1135	* to start matching
	1136	* @param srcLength the number of characters in <TT>srcText</TT> to match
	1137	* @param start the offset into this at which to start matching
	1138	* @param length the number of characters in this to search
	1139	* @return The offset into this of the start of <TT>text</TT>,
	1140	* or -1 if not found.
	1141	* @stable ICU 2.0
	1142	*/
	1143	inline int32_t lastIndexOf(const UnicodeString& srcText,
	1144	int32_t srcStart,
	1145	int32_t srcLength,
	1146	int32_t start,
	1147	int32_t length) const;
	1148
	1149	/**
	1150	* Locate in this the last occurrence of the characters in <TT>srcChars</TT>
	1151	* starting at offset <TT>start</TT>, using bitwise comparison.
	1152	* @param srcChars The text to search for.
	1153	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1154	* @param start the offset into this at which to start matching
	1155	* @return The offset into this of the start of <TT>text</TT>,
	1156	* or -1 if not found.
	1157	* @stable ICU 2.0
	1158	*/
	1159	inline int32_t lastIndexOf(const UChar *srcChars,
	1160	int32_t srcLength,
	1161	int32_t start) const;
	1162
	1163	/**
	1164	* Locate in this the last occurrence in the range
	1165	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1166	* in <TT>srcChars</TT>, using bitwise comparison.
	1167	* @param srcChars The text to search for.
	1168	* @param srcLength the number of characters in <TT>srcChars</TT>
	1169	* @param start The offset at which searching will start.
	1170	* @param length The number of characters to search
	1171	* @return The offset into this of the start of <TT>srcChars</TT>,
	1172	* or -1 if not found.
	1173	* @stable ICU 2.0
	1174	*/
	1175	inline int32_t lastIndexOf(const UChar *srcChars,
	1176	int32_t srcLength,
	1177	int32_t start,
	1178	int32_t length) const;
	1179
	1180	/**
	1181	* Locate in this the last occurrence in the range
	1182	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1183	* in <TT>srcChars</TT> in the range
	1184	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	1185	* using bitwise comparison.
	1186	* @param srcChars The text to search for.
	1187	* @param srcStart the offset into <TT>srcChars</TT> at which
	1188	* to start matching
	1189	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1190	* @param start the offset into this at which to start matching
	1191	* @param length the number of characters in this to search
	1192	* @return The offset into this of the start of <TT>text</TT>,
	1193	* or -1 if not found.
	1194	* @stable ICU 2.0
	1195	*/
	1196	int32_t lastIndexOf(const UChar *srcChars,
	1197	int32_t srcStart,
	1198	int32_t srcLength,
	1199	int32_t start,
	1200	int32_t length) const;
	1201
	1202	/**
	1203	* Locate in this the last occurrence of the BMP code point <code>c</code>,
	1204	* using bitwise comparison.
	1205	* @param c The code unit to search for.
	1206	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1207	* @stable ICU 2.0
	1208	*/
	1209	inline int32_t lastIndexOf(UChar c) const;
	1210
	1211	/**
	1212	* Locate in this the last occurrence of the code point <TT>c</TT>,
	1213	* using bitwise comparison.
	1214	*
	1215	* @param c The code point to search for.
	1216	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1217	* @stable ICU 2.0
	1218	*/
	1219	inline int32_t lastIndexOf(UChar32 c) const;
	1220
	1221	/**
	1222	* Locate in this the last occurrence of the BMP code point <code>c</code>
	1223	* starting at offset <TT>start</TT>, using bitwise comparison.
	1224	* @param c The code unit to search for.
	1225	* @param start The offset at which searching will start.
	1226	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1227	* @stable ICU 2.0
	1228	*/
	1229	inline int32_t lastIndexOf(UChar c,
	1230	int32_t start) const;
	1231
	1232	/**
	1233	* Locate in this the last occurrence of the code point <TT>c</TT>
	1234	* starting at offset <TT>start</TT>, using bitwise comparison.
	1235	*
	1236	* @param c The code point to search for.
	1237	* @param start The offset at which searching will start.
	1238	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1239	* @stable ICU 2.0
	1240	*/
	1241	inline int32_t lastIndexOf(UChar32 c,
	1242	int32_t start) const;
	1243
	1244	/**
	1245	* Locate in this the last occurrence of the BMP code point <code>c</code>
	1246	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1247	* using bitwise comparison.
	1248	* @param c The code unit to search for.
	1249	* @param start the offset into this at which to start matching
	1250	* @param length the number of characters in this to search
	1251	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1252	* @stable ICU 2.0
	1253	*/
	1254	inline int32_t lastIndexOf(UChar c,
	1255	int32_t start,
	1256	int32_t length) const;
	1257
	1258	/**
	1259	* Locate in this the last occurrence of the code point <TT>c</TT>
	1260	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1261	* using bitwise comparison.
	1262	*
	1263	* @param c The code point to search for.
	1264	* @param start the offset into this at which to start matching
	1265	* @param length the number of characters in this to search
	1266	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1267	* @stable ICU 2.0
	1268	*/
	1269	inline int32_t lastIndexOf(UChar32 c,
	1270	int32_t start,
	1271	int32_t length) const;
	1272
	1273
	1274	/* Character access */
	1275
	1276	/**
	1277	* Return the code unit at offset <tt>offset</tt>.
	1278	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1279	* @param offset a valid offset into the text
	1280	* @return the code unit at offset <tt>offset</tt>
	1281	* or 0xffff if the offset is not valid for this string
	1282	* @stable ICU 2.0
	1283	*/
	1284	inline UChar charAt(int32_t offset) const;
	1285
	1286	/**
	1287	* Return the code unit at offset <tt>offset</tt>.
	1288	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1289	* @param offset a valid offset into the text
	1290	* @return the code unit at offset <tt>offset</tt>
	1291	* @stable ICU 2.0
	1292	*/
	1293	inline UChar operator[] (int32_t offset) const;
	1294
	1295	/**
	1296	* Return the code point that contains the code unit
	1297	* at offset <tt>offset</tt>.
	1298	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1299	* @param offset a valid offset into the text
	1300	* that indicates the text offset of any of the code units
	1301	* that will be assembled into a code point (21-bit value) and returned
	1302	* @return the code point of text at <tt>offset</tt>
	1303	* or 0xffff if the offset is not valid for this string
	1304	* @stable ICU 2.0
	1305	*/
	1306	UChar32 char32At(int32_t offset) const;
	1307
	1308	/**
	1309	* Adjust a random-access offset so that
	1310	* it points to the beginning of a Unicode character.
	1311	* The offset that is passed in points to
	1312	* any code unit of a code point,
	1313	* while the returned offset will point to the first code unit
	1314	* of the same code point.
	1315	* In UTF-16, if the input offset points to a second surrogate
	1316	* of a surrogate pair, then the returned offset will point
	1317	* to the first surrogate.
	1318	* @param offset a valid offset into one code point of the text
	1319	* @return offset of the first code unit of the same code point
	1320	* @see U16_SET_CP_START
	1321	* @stable ICU 2.0
	1322	*/
	1323	int32_t getChar32Start(int32_t offset) const;
	1324
	1325	/**
	1326	* Adjust a random-access offset so that
	1327	* it points behind a Unicode character.
	1328	* The offset that is passed in points behind
	1329	* any code unit of a code point,
	1330	* while the returned offset will point behind the last code unit
	1331	* of the same code point.
	1332	* In UTF-16, if the input offset points behind the first surrogate
	1333	* (i.e., to the second surrogate)
	1334	* of a surrogate pair, then the returned offset will point
	1335	* behind the second surrogate (i.e., to the first surrogate).
	1336	* @param offset a valid offset after any code unit of a code point of the text
	1337	* @return offset of the first code unit after the same code point
	1338	* @see U16_SET_CP_LIMIT
	1339	* @stable ICU 2.0
	1340	*/
	1341	int32_t getChar32Limit(int32_t offset) const;
	1342
	1343	/**
	1344	* Move the code unit index along the string by delta code points.
	1345	* Interpret the input index as a code unit-based offset into the string,
	1346	* move the index forward or backward by delta code points, and
	1347	* return the resulting index.
	1348	* The input index should point to the first code unit of a code point,
	1349	* if there is more than one.
	1350	*
	1351	* Both input and output indexes are code unit-based as for all
	1352	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
	1353	* If delta<0 then the index is moved backward (toward the start of the string).
	1354	* If delta>0 then the index is moved forward (toward the end of the string).
	1355	*
	1356	* This behaves like CharacterIterator::move32(delta, kCurrent).
	1357	*
	1358	* Behavior for out-of-bounds indexes:
	1359	* <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
	1360	* if the input index<0 then it is pinned to 0;
	1361	* if it is index>length() then it is pinned to length().
	1362	* Afterwards, the index is moved by <code>delta</code> code points
	1363	* forward or backward,
	1364	* but no further backward than to 0 and no further forward than to length().
	1365	* The resulting index return value will be in between 0 and length(), inclusively.
	1366	*
	1367	* Examples:
	1368	* <pre>
	1369	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
	1370	* UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
	1371	*
	1372	* // initial index: position of U+10000
	1373	* int32_t index=1;
	1374	*
	1375	* // the following examples will all result in index==4, position of U+10ffff
	1376	*
	1377	* // skip 2 code points from some position in the string
	1378	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
	1379	*
	1380	* // go to the 3rd code point from the start of s (0-based)
	1381	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
	1382	*
	1383	* // go to the next-to-last code point of s
	1384	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
	1385	* </pre>
	1386	*
	1387	* @param index input code unit index
	1388	* @param delta (signed) code point count to move the index forward or backward
	1389	* in the string
	1390	* @return the resulting code unit index
	1391	* @stable ICU 2.0
	1392	*/
	1393	int32_t moveIndex32(int32_t index, int32_t delta) const;
	1394
	1395	/* Substring extraction */
	1396
	1397	/**
	1398	* Copy the characters in the range
	1399	* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
	1400	* beginning at <tt>dstStart</tt>.
	1401	* If the string aliases to <code>dst</code> itself as an external buffer,
	1402	* then extract() will not copy the contents.
	1403	*
	1404	* @param start offset of first character which will be copied into the array
	1405	* @param length the number of characters to extract
	1406	* @param dst array in which to copy characters. The length of <tt>dst</tt>
	1407	* must be at least (<tt>dstStart + length</tt>).
	1408	* @param dstStart the offset in <TT>dst</TT> where the first character
	1409	* will be extracted
	1410	* @stable ICU 2.0
	1411	*/
	1412	inline void extract(int32_t start,
	1413	int32_t length,
	1414	UChar *dst,
	1415	int32_t dstStart = 0) const;
	1416
	1417	/**
	1418	* Copy the contents of the string into dest.
	1419	* This is a convenience function that
	1420	* checks if there is enough space in dest,
	1421	* extracts the entire string if possible,
	1422	* and NUL-terminates dest if possible.
	1423	*
	1424	* If the string fits into dest but cannot be NUL-terminated
	1425	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
	1426	* If the string itself does not fit into dest
	1427	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
	1428	*
	1429	* If the string aliases to <code>dest</code> itself as an external buffer,
	1430	* then extract() will not copy the contents.
	1431	*
	1432	* @param dest Destination string buffer.
	1433	* @param destCapacity Number of UChars available at dest.
	1434	* @param errorCode ICU error code.
	1435	* @return length()
	1436	* @stable ICU 2.0
	1437	*/
	1438	int32_t
	1439	extract(UChar *dest, int32_t destCapacity,
	1440	UErrorCode &errorCode) const;
	1441
	1442	/**
	1443	* Copy the characters in the range
	1444	* [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
	1445	* <tt>target</tt>.
	1446	* @param start offset of first character which will be copied
	1447	* @param length the number of characters to extract
	1448	* @param target UnicodeString into which to copy characters.
	1449	* @return A reference to <TT>target</TT>
	1450	* @stable ICU 2.0
	1451	*/
	1452	inline void extract(int32_t start,
	1453	int32_t length,
	1454	UnicodeString& target) const;
	1455
	1456	/**
	1457	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
	1458	* into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
	1459	* @param start offset of first character which will be copied into the array
	1460	* @param limit offset immediately following the last character to be copied
	1461	* @param dst array in which to copy characters. The length of <tt>dst</tt>
	1462	* must be at least (<tt>dstStart + (limit - start)</tt>).
	1463	* @param dstStart the offset in <TT>dst</TT> where the first character
	1464	* will be extracted
	1465	* @stable ICU 2.0
	1466	*/
	1467	inline void extractBetween(int32_t start,
	1468	int32_t limit,
	1469	UChar *dst,
	1470	int32_t dstStart = 0) const;
	1471
	1472	/**
	1473	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
	1474	* into the UnicodeString <tt>target</tt>. Replaceable API.
	1475	* @param start offset of first character which will be copied
	1476	* @param limit offset immediately following the last character to be copied
	1477	* @param target UnicodeString into which to copy characters.
	1478	* @return A reference to <TT>target</TT>
	1479	* @stable ICU 2.0
	1480	*/
	1481	virtual void extractBetween(int32_t start,
	1482	int32_t limit,
	1483	UnicodeString& target) const;
	1484
	1485	/**
	1486	* Copy the characters in the range
	1487	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
	1488	* All characters must be invariant (see utypes.h).
	1489	* Use US_INV as the last, signature-distinguishing parameter.
	1490	*
	1491	* This function does not write any more than <code>targetLength</code>
	1492	* characters but returns the length of the entire output string
	1493	* so that one can allocate a larger buffer and call the function again
	1494	* if necessary.
	1495	* The output string is NUL-terminated if possible.
	1496	*
	1497	* @param start offset of first character which will be copied
	1498	* @param startLength the number of characters to extract
	1499	* @param target the target buffer for extraction, can be NULL
	1500	* if targetLength is 0
	1501	* @param targetCapacity the length of the target buffer
	1502	* @param inv Signature-distinguishing paramater, use US_INV.
	1503	* @return the output string length, not including the terminating NUL
	1504	* @stable ICU 3.2
	1505	*/
	1506	int32_t extract(int32_t start,
	1507	int32_t startLength,
	1508	char *target,
	1509	int32_t targetCapacity,
	1510	enum EInvariant inv) const;
	1511
	1512	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
	1513
	1514	/**
	1515	* Copy the characters in the range
	1516	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1517	* in the platform's default codepage.
	1518	* This function does not write any more than <code>targetLength</code>
	1519	* characters but returns the length of the entire output string
	1520	* so that one can allocate a larger buffer and call the function again
	1521	* if necessary.
	1522	* The output string is NUL-terminated if possible.
	1523	*
	1524	* @param start offset of first character which will be copied
	1525	* @param startLength the number of characters to extract
	1526	* @param target the target buffer for extraction
	1527	* @param targetLength the length of the target buffer
	1528	* If <TT>target</TT> is NULL, then the number of bytes required for
	1529	* <TT>target</TT> is returned.
	1530	* @return the output string length, not including the terminating NUL
	1531	* @stable ICU 2.0
	1532	*/
	1533	int32_t extract(int32_t start,
	1534	int32_t startLength,
	1535	char *target,
	1536	uint32_t targetLength) const;
	1537
	1538	#endif
	1539
	1540	#if !UCONFIG_NO_CONVERSION
	1541
	1542	/**
	1543	* Copy the characters in the range
	1544	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1545	* in a specified codepage.
	1546	* The output string is NUL-terminated.
	1547	*
	1548	* Recommendation: For invariant-character strings use
	1549	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
	1550	* because it avoids object code dependencies of UnicodeString on
	1551	* the conversion code.
	1552	*
	1553	* @param start offset of first character which will be copied
	1554	* @param startLength the number of characters to extract
	1555	* @param target the target buffer for extraction
	1556	* @param codepage the desired codepage for the characters. 0 has
	1557	* the special meaning of the default codepage
	1558	* If <code>codepage</code> is an empty string (<code>""</code>),
	1559	* then a simple conversion is performed on the codepage-invariant
	1560	* subset ("invariant characters") of the platform encoding. See utypes.h.
	1561	* If <TT>target</TT> is NULL, then the number of bytes required for
	1562	* <TT>target</TT> is returned. It is assumed that the target is big enough
	1563	* to fit all of the characters.
	1564	* @return the output string length, not including the terminating NUL
	1565	* @stable ICU 2.0
	1566	*/
	1567	inline int32_t extract(int32_t start,
	1568	int32_t startLength,
	1569	char *target,
	1570	const char *codepage = 0) const;
	1571
	1572	/**
	1573	* Copy the characters in the range
	1574	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1575	* in a specified codepage.
	1576	* This function does not write any more than <code>targetLength</code>
	1577	* characters but returns the length of the entire output string
	1578	* so that one can allocate a larger buffer and call the function again
	1579	* if necessary.
	1580	* The output string is NUL-terminated if possible.
	1581	*
	1582	* Recommendation: For invariant-character strings use
	1583	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
	1584	* because it avoids object code dependencies of UnicodeString on
	1585	* the conversion code.
	1586	*
	1587	* @param start offset of first character which will be copied
	1588	* @param startLength the number of characters to extract
	1589	* @param target the target buffer for extraction
	1590	* @param targetLength the length of the target buffer
	1591	* @param codepage the desired codepage for the characters. 0 has
	1592	* the special meaning of the default codepage
	1593	* If <code>codepage</code> is an empty string (<code>""</code>),
	1594	* then a simple conversion is performed on the codepage-invariant
	1595	* subset ("invariant characters") of the platform encoding. See utypes.h.
	1596	* If <TT>target</TT> is NULL, then the number of bytes required for
	1597	* <TT>target</TT> is returned.
	1598	* @return the output string length, not including the terminating NUL
	1599	* @stable ICU 2.0
	1600	*/
	1601	int32_t extract(int32_t start,
	1602	int32_t startLength,
	1603	char *target,
	1604	uint32_t targetLength,
	1605	const char *codepage) const;
	1606
	1607	/**
	1608	* Convert the UnicodeString into a codepage string using an existing UConverter.
	1609	* The output string is NUL-terminated if possible.
	1610	*
	1611	* This function avoids the overhead of opening and closing a converter if
	1612	* multiple strings are extracted.
	1613	*
	1614	* @param dest destination string buffer, can be NULL if destCapacity==0
	1615	* @param destCapacity the number of chars available at dest
	1616	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
	1617	* or NULL for the default converter
	1618	* @param errorCode normal ICU error code
	1619	* @return the length of the output string, not counting the terminating NUL;
	1620	* if the length is greater than destCapacity, then the string will not fit
	1621	* and a buffer of the indicated length would need to be passed in
	1622	* @stable ICU 2.0
	1623	*/
	1624	int32_t extract(char *dest, int32_t destCapacity,
	1625	UConverter *cnv,
	1626	UErrorCode &errorCode) const;
	1627
	1628	#endif
	1629
	1630	/**
	1631	* Create a temporary substring for the specified range.
	1632	* Unlike the substring constructor and setTo() functions,
	1633	* the object returned here will be a read-only alias (using getBuffer())
	1634	* rather than copying the text.
	1635	* As a result, this substring operation is much faster but requires
	1636	* that the original string not be modified or deleted during the lifetime
	1637	* of the returned substring object.
	1638	* @param start offset of the first character visible in the substring
	1639	* @param length length of the substring
	1640	* @return a read-only alias UnicodeString object for the substring
	1641	* @stable ICU 4.4
	1642	*/
	1643	UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
	1644
	1645	/**
	1646	* Create a temporary substring for the specified range.
	1647	* Same as tempSubString(start, length) except that the substring range
	1648	* is specified as a (start, limit) pair (with an exclusive limit index)
	1649	* rather than a (start, length) pair.
	1650	* @param start offset of the first character visible in the substring
	1651	* @param limit offset immediately following the last character visible in the substring
	1652	* @return a read-only alias UnicodeString object for the substring
	1653	* @stable ICU 4.4
	1654	*/
	1655	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
	1656
	1657	/**
	1658	* Convert the UnicodeString to UTF-8 and write the result
	1659	* to a ByteSink. This is called by toUTF8String().
	1660	* Unpaired surrogates are replaced with U+FFFD.
	1661	* Calls u_strToUTF8WithSub().
	1662	*
	1663	* @param sink A ByteSink to which the UTF-8 version of the string is written.
	1664	* sink.Flush() is called at the end.
	1665	* @stable ICU 4.2
	1666	* @see toUTF8String
	1667	*/
	1668	void toUTF8(ByteSink &sink) const;
	1669
	1670	#if U_HAVE_STD_STRING
	1671
	1672	/**
	1673	* Convert the UnicodeString to UTF-8 and append the result
	1674	* to a standard string.
	1675	* Unpaired surrogates are replaced with U+FFFD.
	1676	* Calls toUTF8().
	1677	*
	1678	* @param result A standard string (or a compatible object)
	1679	* to which the UTF-8 version of the string is appended.
	1680	* @return The string object.
	1681	* @stable ICU 4.2
	1682	* @see toUTF8
	1683	*/
	1684	template<typename StringClass>
	1685	StringClass &toUTF8String(StringClass &result) const {
	1686	StringByteSink<StringClass> sbs(&result);
	1687	toUTF8(sbs);
	1688	return result;
	1689	}
	1690
	1691	#endif
	1692
	1693	/**
	1694	* Convert the UnicodeString to UTF-32.
	1695	* Unpaired surrogates are replaced with U+FFFD.
	1696	* Calls u_strToUTF32WithSub().
	1697	*
	1698	* @param utf32 destination string buffer, can be NULL if capacity==0
	1699	* @param capacity the number of UChar32s available at utf32
	1700	* @param errorCode Standard ICU error code. Its input value must
	1701	* pass the U_SUCCESS() test, or else the function returns
	1702	* immediately. Check for U_FAILURE() on output or use with
	1703	* function chaining. (See User Guide for details.)
	1704	* @return The length of the UTF-32 string.
	1705	* @see fromUTF32
	1706	* @stable ICU 4.2
	1707	*/
	1708	int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
	1709
	1710	/* Length operations */
	1711
	1712	/**
	1713	* Return the length of the UnicodeString object.
	1714	* The length is the number of UChar code units are in the UnicodeString.
	1715	* If you want the number of code points, please use countChar32().
	1716	* @return the length of the UnicodeString object
	1717	* @see countChar32
	1718	* @stable ICU 2.0
	1719	*/
	1720	inline int32_t length(void) const;
	1721
	1722	/**
	1723	* Count Unicode code points in the length UChar code units of the string.
	1724	* A code point may occupy either one or two UChar code units.
	1725	* Counting code points involves reading all code units.
	1726	*
	1727	* This functions is basically the inverse of moveIndex32().
	1728	*
	1729	* @param start the index of the first code unit to check
	1730	* @param length the number of UChar code units to check
	1731	* @return the number of code points in the specified code units
	1732	* @see length
	1733	* @stable ICU 2.0
	1734	*/
	1735	int32_t
	1736	countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
	1737
	1738	/**
	1739	* Check if the length UChar code units of the string
	1740	* contain more Unicode code points than a certain number.
	1741	* This is more efficient than counting all code points in this part of the string
	1742	* and comparing that number with a threshold.
	1743	* This function may not need to scan the string at all if the length
	1744	* falls within a certain range, and
	1745	* never needs to count more than 'number+1' code points.
	1746	* Logically equivalent to (countChar32(start, length)>number).
	1747	* A Unicode code point may occupy either one or two UChar code units.
	1748	*
	1749	* @param start the index of the first code unit to check (0 for the entire string)
	1750	* @param length the number of UChar code units to check
	1751	* (use INT32_MAX for the entire string; remember that start/length
	1752	* values are pinned)
	1753	* @param number The number of code points in the (sub)string is compared against
	1754	* the 'number' parameter.
	1755	* @return Boolean value for whether the string contains more Unicode code points
	1756	* than 'number'. Same as (u_countChar32(s, length)>number).
	1757	* @see countChar32
	1758	* @see u_strHasMoreChar32Than
	1759	* @stable ICU 2.4
	1760	*/
	1761	UBool
	1762	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
	1763
	1764	/**
	1765	* Determine if this string is empty.
	1766	* @return TRUE if this string contains 0 characters, FALSE otherwise.
	1767	* @stable ICU 2.0
	1768	*/
	1769	inline UBool isEmpty(void) const;
	1770
	1771	/**
	1772	* Return the capacity of the internal buffer of the UnicodeString object.
	1773	* This is useful together with the getBuffer functions.
	1774	* See there for details.
	1775	*
	1776	* @return the number of UChars available in the internal buffer
	1777	* @see getBuffer
	1778	* @stable ICU 2.0
	1779	*/
	1780	inline int32_t getCapacity(void) const;
	1781
	1782	/* Other operations */
	1783
	1784	/**
	1785	* Generate a hash code for this object.
	1786	* @return The hash code of this UnicodeString.
	1787	* @stable ICU 2.0
	1788	*/
	1789	inline int32_t hashCode(void) const;
	1790
	1791	/**
	1792	* Determine if this object contains a valid string.
	1793	* A bogus string has no value. It is different from an empty string,
	1794	* although in both cases isEmpty() returns TRUE and length() returns 0.
	1795	* setToBogus() and isBogus() can be used to indicate that no string value is available.
	1796	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
	1797	* length() returns 0.
	1798	*
	1799	* @return TRUE if the string is valid, FALSE otherwise
	1800	* @see setToBogus()
	1801	* @stable ICU 2.0
	1802	*/
	1803	inline UBool isBogus(void) const;
	1804
	1805
	1806	//========================================
	1807	// Write operations
	1808	//========================================
	1809
	1810	/* Assignment operations */
	1811
	1812	/**
	1813	* Assignment operator. Replace the characters in this UnicodeString
	1814	* with the characters from <TT>srcText</TT>.
	1815	* @param srcText The text containing the characters to replace
	1816	* @return a reference to this
	1817	* @stable ICU 2.0
	1818	*/
	1819	UnicodeString &operator=(const UnicodeString &srcText);
	1820
	1821	/**
	1822	* Almost the same as the assignment operator.
	1823	* Replace the characters in this UnicodeString
	1824	* with the characters from <code>srcText</code>.
	1825	*
	1826	* This function works the same as the assignment operator
	1827	* for all strings except for ones that are readonly aliases.
	1828	*
	1829	* Starting with ICU 2.4, the assignment operator and the copy constructor
	1830	* allocate a new buffer and copy the buffer contents even for readonly aliases.
	1831	* This function implements the old, more efficient but less safe behavior
	1832	* of making this string also a readonly alias to the same buffer.
	1833	*
	1834	* The fastCopyFrom function must be used only if it is known that the lifetime of
	1835	* this UnicodeString does not exceed the lifetime of the aliased buffer
	1836	* including its contents, for example for strings from resource bundles
	1837	* or aliases to string constants.
	1838	*
	1839	* @param src The text containing the characters to replace.
	1840	* @return a reference to this
	1841	* @stable ICU 2.4
	1842	*/
	1843	UnicodeString &fastCopyFrom(const UnicodeString &src);
	1844
	1845	/**
	1846	* Assignment operator. Replace the characters in this UnicodeString
	1847	* with the code unit <TT>ch</TT>.
	1848	* @param ch the code unit to replace
	1849	* @return a reference to this
	1850	* @stable ICU 2.0
	1851	*/
	1852	inline UnicodeString& operator= (UChar ch);
	1853
	1854	/**
	1855	* Assignment operator. Replace the characters in this UnicodeString
	1856	* with the code point <TT>ch</TT>.
	1857	* @param ch the code point to replace
	1858	* @return a reference to this
	1859	* @stable ICU 2.0
	1860	*/
	1861	inline UnicodeString& operator= (UChar32 ch);
	1862
	1863	/**
	1864	* Set the text in the UnicodeString object to the characters
	1865	* in <TT>srcText</TT> in the range
	1866	* [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
	1867	* <TT>srcText</TT> is not modified.
	1868	* @param srcText the source for the new characters
	1869	* @param srcStart the offset into <TT>srcText</TT> where new characters
	1870	* will be obtained
	1871	* @return a reference to this
	1872	* @stable ICU 2.2
	1873	*/
	1874	inline UnicodeString& setTo(const UnicodeString& srcText,
	1875	int32_t srcStart);
	1876
	1877	/**
	1878	* Set the text in the UnicodeString object to the characters
	1879	* in <TT>srcText</TT> in the range
	1880	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	1881	* <TT>srcText</TT> is not modified.
	1882	* @param srcText the source for the new characters
	1883	* @param srcStart the offset into <TT>srcText</TT> where new characters
	1884	* will be obtained
	1885	* @param srcLength the number of characters in <TT>srcText</TT> in the
	1886	* replace string.
	1887	* @return a reference to this
	1888	* @stable ICU 2.0
	1889	*/
	1890	inline UnicodeString& setTo(const UnicodeString& srcText,
	1891	int32_t srcStart,
	1892	int32_t srcLength);
	1893
	1894	/**
	1895	* Set the text in the UnicodeString object to the characters in
	1896	* <TT>srcText</TT>.
	1897	* <TT>srcText</TT> is not modified.
	1898	* @param srcText the source for the new characters
	1899	* @return a reference to this
	1900	* @stable ICU 2.0
	1901	*/
	1902	inline UnicodeString& setTo(const UnicodeString& srcText);
	1903
	1904	/**
	1905	* Set the characters in the UnicodeString object to the characters
	1906	* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
	1907	* @param srcChars the source for the new characters
	1908	* @param srcLength the number of Unicode characters in srcChars.
	1909	* @return a reference to this
	1910	* @stable ICU 2.0
	1911	*/
	1912	inline UnicodeString& setTo(const UChar *srcChars,
	1913	int32_t srcLength);
	1914
	1915	/**
	1916	* Set the characters in the UnicodeString object to the code unit
	1917	* <TT>srcChar</TT>.
	1918	* @param srcChar the code unit which becomes the UnicodeString's character
	1919	* content
	1920	* @return a reference to this
	1921	* @stable ICU 2.0
	1922	*/
	1923	UnicodeString& setTo(UChar srcChar);
	1924
	1925	/**
	1926	* Set the characters in the UnicodeString object to the code point
	1927	* <TT>srcChar</TT>.
	1928	* @param srcChar the code point which becomes the UnicodeString's character
	1929	* content
	1930	* @return a reference to this
	1931	* @stable ICU 2.0
	1932	*/
	1933	UnicodeString& setTo(UChar32 srcChar);
	1934
	1935	/**
	1936	* Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
	1937	* The text will be used for the UnicodeString object, but
	1938	* it will not be released when the UnicodeString is destroyed.
	1939	* This has copy-on-write semantics:
	1940	* When the string is modified, then the buffer is first copied into
	1941	* newly allocated memory.
	1942	* The aliased buffer is never modified.
	1943	*
	1944	* In an assignment to another UnicodeString, when using the copy constructor
	1945	* or the assignment operator, the text will be copied.
	1946	* When using fastCopyFrom(), the text will be aliased again,
	1947	* so that both strings then alias the same readonly-text.
	1948	*
	1949	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
	1950	* This must be true if <code>textLength==-1</code>.
	1951	* @param text The characters to alias for the UnicodeString.
	1952	* @param textLength The number of Unicode characters in <code>text</code> to alias.
	1953	* If -1, then this constructor will determine the length
	1954	* by calling <code>u_strlen()</code>.
	1955	* @return a reference to this
	1956	* @stable ICU 2.0
	1957	*/
	1958	UnicodeString &setTo(UBool isTerminated,
	1959	const UChar *text,
	1960	int32_t textLength);
	1961
	1962	/**
	1963	* Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
	1964	* The text will be used for the UnicodeString object, but
	1965	* it will not be released when the UnicodeString is destroyed.
	1966	* This has write-through semantics:
	1967	* For as long as the capacity of the buffer is sufficient, write operations
	1968	* will directly affect the buffer. When more capacity is necessary, then
	1969	* a new buffer will be allocated and the contents copied as with regularly
	1970	* constructed strings.
	1971	* In an assignment to another UnicodeString, the buffer will be copied.
	1972	* The extract(UChar *dst) function detects whether the dst pointer is the same
	1973	* as the string buffer itself and will in this case not copy the contents.
	1974	*
	1975	* @param buffer The characters to alias for the UnicodeString.
	1976	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
	1977	* @param buffCapacity The size of <code>buffer</code> in UChars.
	1978	* @return a reference to this
	1979	* @stable ICU 2.0
	1980	*/
	1981	UnicodeString &setTo(UChar *buffer,
	1982	int32_t buffLength,
	1983	int32_t buffCapacity);
	1984
	1985	/**
	1986	* Make this UnicodeString object invalid.
	1987	* The string will test TRUE with isBogus().
	1988	*
	1989	* A bogus string has no value. It is different from an empty string.
	1990	* It can be used to indicate that no string value is available.
	1991	* getBuffer() and getTerminatedBuffer() return NULL, and
	1992	* length() returns 0.
	1993	*
	1994	* This utility function is used throughout the UnicodeString
	1995	* implementation to indicate that a UnicodeString operation failed,
	1996	* and may be used in other functions,
	1997	* especially but not exclusively when such functions do not
	1998	* take a UErrorCode for simplicity.
	1999	*
	2000	* The following methods, and no others, will clear a string object's bogus flag:
	2001	* - remove()
	2002	* - remove(0, INT32_MAX)
	2003	* - truncate(0)
	2004	* - operator=() (assignment operator)
	2005	* - setTo(...)
	2006	*
	2007	* The simplest ways to turn a bogus string into an empty one
	2008	* is to use the remove() function.
	2009	* Examples for other functions that are equivalent to "set to empty string":
	2010	* \code
	2011	* if(s.isBogus()) {
	2012	* s.remove(); // set to an empty string (remove all), or
	2013	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
	2014	* s.truncate(0); // set to an empty string (complete truncation), or
	2015	* s=UnicodeString(); // assign an empty string, or
	2016	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
	2017	* static const UChar nul=0;
	2018	* s.setTo(&nul, 0); // set to an empty C Unicode string
	2019	* }
	2020	* \endcode
	2021	*
	2022	* @see isBogus()
	2023	* @stable ICU 2.0
	2024	*/
	2025	void setToBogus();
	2026
	2027	/**
	2028	* Set the character at the specified offset to the specified character.
	2029	* @param offset A valid offset into the text of the character to set
	2030	* @param ch The new character
	2031	* @return A reference to this
	2032	* @stable ICU 2.0
	2033	*/
	2034	UnicodeString& setCharAt(int32_t offset,
	2035	UChar ch);
	2036
	2037
	2038	/* Append operations */
	2039
	2040	/**
	2041	* Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
	2042	* object.
	2043	* @param ch the code unit to be appended
	2044	* @return a reference to this
	2045	* @stable ICU 2.0
	2046	*/
	2047	inline UnicodeString& operator+= (UChar ch);
	2048
	2049	/**
	2050	* Append operator. Append the code point <TT>ch</TT> to the UnicodeString
	2051	* object.
	2052	* @param ch the code point to be appended
	2053	* @return a reference to this
	2054	* @stable ICU 2.0
	2055	*/
	2056	inline UnicodeString& operator+= (UChar32 ch);
	2057
	2058	/**
	2059	* Append operator. Append the characters in <TT>srcText</TT> to the
	2060	* UnicodeString object. <TT>srcText</TT> is not modified.
	2061	* @param srcText the source for the new characters
	2062	* @return a reference to this
	2063	* @stable ICU 2.0
	2064	*/
	2065	inline UnicodeString& operator+= (const UnicodeString& srcText);
	2066
	2067	/**
	2068	* Append the characters
	2069	* in <TT>srcText</TT> in the range
	2070	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
	2071	* UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
	2072	* is not modified.
	2073	* @param srcText the source for the new characters
	2074	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2075	* will be obtained
	2076	* @param srcLength the number of characters in <TT>srcText</TT> in
	2077	* the append string
	2078	* @return a reference to this
	2079	* @stable ICU 2.0
	2080	*/
	2081	inline UnicodeString& append(const UnicodeString& srcText,
	2082	int32_t srcStart,
	2083	int32_t srcLength);
	2084
	2085	/**
	2086	* Append the characters in <TT>srcText</TT> to the UnicodeString object.
	2087	* <TT>srcText</TT> is not modified.
	2088	* @param srcText the source for the new characters
	2089	* @return a reference to this
	2090	* @stable ICU 2.0
	2091	*/
	2092	inline UnicodeString& append(const UnicodeString& srcText);
	2093
	2094	/**
	2095	* Append the characters in <TT>srcChars</TT> in the range
	2096	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
	2097	* object at offset
	2098	* <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2099	* @param srcChars the source for the new characters
	2100	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2101	* will be obtained
	2102	* @param srcLength the number of characters in <TT>srcChars</TT> in
	2103	* the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
	2104	* @return a reference to this
	2105	* @stable ICU 2.0
	2106	*/
	2107	inline UnicodeString& append(const UChar *srcChars,
	2108	int32_t srcStart,
	2109	int32_t srcLength);
	2110
	2111	/**
	2112	* Append the characters in <TT>srcChars</TT> to the UnicodeString object
	2113	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2114	* @param srcChars the source for the new characters
	2115	* @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
	2116	* can be -1 if <TT>srcChars</TT> is NUL-terminated
	2117	* @return a reference to this
	2118	* @stable ICU 2.0
	2119	*/
	2120	inline UnicodeString& append(const UChar *srcChars,
	2121	int32_t srcLength);
	2122
	2123	/**
	2124	* Append the code unit <TT>srcChar</TT> to the UnicodeString object.
	2125	* @param srcChar the code unit to append
	2126	* @return a reference to this
	2127	* @stable ICU 2.0
	2128	*/
	2129	inline UnicodeString& append(UChar srcChar);
	2130
	2131	/**
	2132	* Append the code point <TT>srcChar</TT> to the UnicodeString object.
	2133	* @param srcChar the code point to append
	2134	* @return a reference to this
	2135	* @stable ICU 2.0
	2136	*/
	2137	UnicodeString& append(UChar32 srcChar);
	2138
	2139
	2140	/* Insert operations */
	2141
	2142	/**
	2143	* Insert the characters in <TT>srcText</TT> in the range
	2144	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
	2145	* object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
	2146	* @param start the offset where the insertion begins
	2147	* @param srcText the source for the new characters
	2148	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2149	* will be obtained
	2150	* @param srcLength the number of characters in <TT>srcText</TT> in
	2151	* the insert string
	2152	* @return a reference to this
	2153	* @stable ICU 2.0
	2154	*/
	2155	inline UnicodeString& insert(int32_t start,
	2156	const UnicodeString& srcText,
	2157	int32_t srcStart,
	2158	int32_t srcLength);
	2159
	2160	/**
	2161	* Insert the characters in <TT>srcText</TT> into the UnicodeString object
	2162	* at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
	2163	* @param start the offset where the insertion begins
	2164	* @param srcText the source for the new characters
	2165	* @return a reference to this
	2166	* @stable ICU 2.0
	2167	*/
	2168	inline UnicodeString& insert(int32_t start,
	2169	const UnicodeString& srcText);
	2170
	2171	/**
	2172	* Insert the characters in <TT>srcChars</TT> in the range
	2173	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
	2174	* object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2175	* @param start the offset at which the insertion begins
	2176	* @param srcChars the source for the new characters
	2177	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2178	* will be obtained
	2179	* @param srcLength the number of characters in <TT>srcChars</TT>
	2180	* in the insert string
	2181	* @return a reference to this
	2182	* @stable ICU 2.0
	2183	*/
	2184	inline UnicodeString& insert(int32_t start,
	2185	const UChar *srcChars,
	2186	int32_t srcStart,
	2187	int32_t srcLength);
	2188
	2189	/**
	2190	* Insert the characters in <TT>srcChars</TT> into the UnicodeString object
	2191	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2192	* @param start the offset where the insertion begins
	2193	* @param srcChars the source for the new characters
	2194	* @param srcLength the number of Unicode characters in srcChars.
	2195	* @return a reference to this
	2196	* @stable ICU 2.0
	2197	*/
	2198	inline UnicodeString& insert(int32_t start,
	2199	const UChar *srcChars,
	2200	int32_t srcLength);
	2201
	2202	/**
	2203	* Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
	2204	* offset <TT>start</TT>.
	2205	* @param start the offset at which the insertion occurs
	2206	* @param srcChar the code unit to insert
	2207	* @return a reference to this
	2208	* @stable ICU 2.0
	2209	*/
	2210	inline UnicodeString& insert(int32_t start,
	2211	UChar srcChar);
	2212
	2213	/**
	2214	* Insert the code point <TT>srcChar</TT> into the UnicodeString object at
	2215	* offset <TT>start</TT>.
	2216	* @param start the offset at which the insertion occurs
	2217	* @param srcChar the code point to insert
	2218	* @return a reference to this
	2219	* @stable ICU 2.0
	2220	*/
	2221	inline UnicodeString& insert(int32_t start,
	2222	UChar32 srcChar);
	2223
	2224
	2225	/* Replace operations */
	2226
	2227	/**
	2228	* Replace the characters in the range
	2229	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2230	* <TT>srcText</TT> in the range
	2231	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	2232	* <TT>srcText</TT> is not modified.
	2233	* @param start the offset at which the replace operation begins
	2234	* @param length the number of characters to replace. The character at
	2235	* <TT>start + length</TT> is not modified.
	2236	* @param srcText the source for the new characters
	2237	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2238	* will be obtained
	2239	* @param srcLength the number of characters in <TT>srcText</TT> in
	2240	* the replace string
	2241	* @return a reference to this
	2242	* @stable ICU 2.0
	2243	*/
	2244	UnicodeString& replace(int32_t start,
	2245	int32_t length,
	2246	const UnicodeString& srcText,
	2247	int32_t srcStart,
	2248	int32_t srcLength);
	2249
	2250	/**
	2251	* Replace the characters in the range
	2252	* [<TT>start</TT>, <TT>start + length</TT>)
	2253	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
	2254	* not modified.
	2255	* @param start the offset at which the replace operation begins
	2256	* @param length the number of characters to replace. The character at
	2257	* <TT>start + length</TT> is not modified.
	2258	* @param srcText the source for the new characters
	2259	* @return a reference to this
	2260	* @stable ICU 2.0
	2261	*/
	2262	UnicodeString& replace(int32_t start,
	2263	int32_t length,
	2264	const UnicodeString& srcText);
	2265
	2266	/**
	2267	* Replace the characters in the range
	2268	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2269	* <TT>srcChars</TT> in the range
	2270	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
	2271	* is not modified.
	2272	* @param start the offset at which the replace operation begins
	2273	* @param length the number of characters to replace. The character at
	2274	* <TT>start + length</TT> is not modified.
	2275	* @param srcChars the source for the new characters
	2276	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2277	* will be obtained
	2278	* @param srcLength the number of characters in <TT>srcChars</TT>
	2279	* in the replace string
	2280	* @return a reference to this
	2281	* @stable ICU 2.0
	2282	*/
	2283	UnicodeString& replace(int32_t start,
	2284	int32_t length,
	2285	const UChar *srcChars,
	2286	int32_t srcStart,
	2287	int32_t srcLength);
	2288
	2289	/**
	2290	* Replace the characters in the range
	2291	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2292	* <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
	2293	* @param start the offset at which the replace operation begins
	2294	* @param length number of characters to replace. The character at
	2295	* <TT>start + length</TT> is not modified.
	2296	* @param srcChars the source for the new characters
	2297	* @param srcLength the number of Unicode characters in srcChars
	2298	* @return a reference to this
	2299	* @stable ICU 2.0
	2300	*/
	2301	inline UnicodeString& replace(int32_t start,
	2302	int32_t length,
	2303	const UChar *srcChars,
	2304	int32_t srcLength);
	2305
	2306	/**
	2307	* Replace the characters in the range
	2308	* [<TT>start</TT>, <TT>start + length</TT>) with the code unit
	2309	* <TT>srcChar</TT>.
	2310	* @param start the offset at which the replace operation begins
	2311	* @param length the number of characters to replace. The character at
	2312	* <TT>start + length</TT> is not modified.
	2313	* @param srcChar the new code unit
	2314	* @return a reference to this
	2315	* @stable ICU 2.0
	2316	*/
	2317	inline UnicodeString& replace(int32_t start,
	2318	int32_t length,
	2319	UChar srcChar);
	2320
	2321	/**
	2322	* Replace the characters in the range
	2323	* [<TT>start</TT>, <TT>start + length</TT>) with the code point
	2324	* <TT>srcChar</TT>.
	2325	* @param start the offset at which the replace operation begins
	2326	* @param length the number of characters to replace. The character at
	2327	* <TT>start + length</TT> is not modified.
	2328	* @param srcChar the new code point
	2329	* @return a reference to this
	2330	* @stable ICU 2.0
	2331	*/
	2332	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
	2333
	2334	/**
	2335	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
	2336	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
	2337	* @param start the offset at which the replace operation begins
	2338	* @param limit the offset immediately following the replace range
	2339	* @param srcText the source for the new characters
	2340	* @return a reference to this
	2341	* @stable ICU 2.0
	2342	*/
	2343	inline UnicodeString& replaceBetween(int32_t start,
	2344	int32_t limit,
	2345	const UnicodeString& srcText);
	2346
	2347	/**
	2348	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
	2349	* with the characters in <TT>srcText</TT> in the range
	2350	* [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
	2351	* @param start the offset at which the replace operation begins
	2352	* @param limit the offset immediately following the replace range
	2353	* @param srcText the source for the new characters
	2354	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2355	* will be obtained
	2356	* @param srcLimit the offset immediately following the range to copy
	2357	* in <TT>srcText</TT>
	2358	* @return a reference to this
	2359	* @stable ICU 2.0
	2360	*/
	2361	inline UnicodeString& replaceBetween(int32_t start,
	2362	int32_t limit,
	2363	const UnicodeString& srcText,
	2364	int32_t srcStart,
	2365	int32_t srcLimit);
	2366
	2367	/**
	2368	* Replace a substring of this object with the given text.
	2369	* @param start the beginning index, inclusive; <code>0 <= start
	2370	* <= limit</code>.
	2371	* @param limit the ending index, exclusive; <code>start <= limit
	2372	* <= length()</code>.
	2373	* @param text the text to replace characters <code>start</code>
	2374	* to <code>limit - 1</code>
	2375	* @stable ICU 2.0
	2376	*/
	2377	virtual void handleReplaceBetween(int32_t start,
	2378	int32_t limit,
	2379	const UnicodeString& text);
	2380
	2381	/**
	2382	* Replaceable API
	2383	* @return TRUE if it has MetaData
	2384	* @stable ICU 2.4
	2385	*/
	2386	virtual UBool hasMetaData() const;
	2387
	2388	/**
	2389	* Copy a substring of this object, retaining attribute (out-of-band)
	2390	* information. This method is used to duplicate or reorder substrings.
	2391	* The destination index must not overlap the source range.
	2392	*
	2393	* @param start the beginning index, inclusive; <code>0 <= start <=
	2394	* limit</code>.
	2395	* @param limit the ending index, exclusive; <code>start <= limit <=
	2396	* length()</code>.
	2397	* @param dest the destination index. The characters from
	2398	* <code>start..limit-1</code> will be copied to <code>dest</code>.
	2399	* Implementations of this method may assume that <code>dest <= start \|\|
	2400	* dest >= limit</code>.
	2401	* @stable ICU 2.0
	2402	*/
	2403	virtual void copy(int32_t start, int32_t limit, int32_t dest);
	2404
	2405	/* Search and replace operations */
	2406
	2407	/**
	2408	* Replace all occurrences of characters in oldText with the characters
	2409	* in newText
	2410	* @param oldText the text containing the search text
	2411	* @param newText the text containing the replacement text
	2412	* @return a reference to this
	2413	* @stable ICU 2.0
	2414	*/
	2415	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
	2416	const UnicodeString& newText);
	2417
	2418	/**
	2419	* Replace all occurrences of characters in oldText with characters
	2420	* in newText
	2421	* in the range [<TT>start</TT>, <TT>start + length</TT>).
	2422	* @param start the start of the range in which replace will performed
	2423	* @param length the length of the range in which replace will be performed
	2424	* @param oldText the text containing the search text
	2425	* @param newText the text containing the replacement text
	2426	* @return a reference to this
	2427	* @stable ICU 2.0
	2428	*/
	2429	inline UnicodeString& findAndReplace(int32_t start,
	2430	int32_t length,
	2431	const UnicodeString& oldText,
	2432	const UnicodeString& newText);
	2433
	2434	/**
	2435	* Replace all occurrences of characters in oldText in the range
	2436	* [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
	2437	* in newText in the range
	2438	* [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
	2439	* in the range [<TT>start</TT>, <TT>start + length</TT>).
	2440	* @param start the start of the range in which replace will performed
	2441	* @param length the length of the range in which replace will be performed
	2442	* @param oldText the text containing the search text
	2443	* @param oldStart the start of the search range in <TT>oldText</TT>
	2444	* @param oldLength the length of the search range in <TT>oldText</TT>
	2445	* @param newText the text containing the replacement text
	2446	* @param newStart the start of the replacement range in <TT>newText</TT>
	2447	* @param newLength the length of the replacement range in <TT>newText</TT>
	2448	* @return a reference to this
	2449	* @stable ICU 2.0
	2450	*/
	2451	UnicodeString& findAndReplace(int32_t start,
	2452	int32_t length,
	2453	const UnicodeString& oldText,
	2454	int32_t oldStart,
	2455	int32_t oldLength,
	2456	const UnicodeString& newText,
	2457	int32_t newStart,
	2458	int32_t newLength);
	2459
	2460
	2461	/* Remove operations */
	2462
	2463	/**
	2464	* Remove all characters from the UnicodeString object.
	2465	* @return a reference to this
	2466	* @stable ICU 2.0
	2467	*/
	2468	inline UnicodeString& remove(void);
	2469
	2470	/**
	2471	* Remove the characters in the range
	2472	* [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
	2473	* @param start the offset of the first character to remove
	2474	* @param length the number of characters to remove
	2475	* @return a reference to this
	2476	* @stable ICU 2.0
	2477	*/
	2478	inline UnicodeString& remove(int32_t start,
	2479	int32_t length = (int32_t)INT32_MAX);
	2480
	2481	/**
	2482	* Remove the characters in the range
	2483	* [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
	2484	* @param start the offset of the first character to remove
	2485	* @param limit the offset immediately following the range to remove
	2486	* @return a reference to this
	2487	* @stable ICU 2.0
	2488	*/
	2489	inline UnicodeString& removeBetween(int32_t start,
	2490	int32_t limit = (int32_t)INT32_MAX);
	2491
	2492	/**
	2493	* Retain only the characters in the range
	2494	* [<code>start</code>, <code>limit</code>) from the UnicodeString object.
	2495	* Removes characters before <code>start</code> and at and after <code>limit</code>.
	2496	* @param start the offset of the first character to retain
	2497	* @param limit the offset immediately following the range to retain
	2498	* @return a reference to this
	2499	* @stable ICU 4.4
	2500	*/
	2501	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
	2502
	2503	/* Length operations */
	2504
	2505	/**
	2506	* Pad the start of this UnicodeString with the character <TT>padChar</TT>.
	2507	* If the length of this UnicodeString is less than targetLength,
	2508	* length() - targetLength copies of padChar will be added to the
	2509	* beginning of this UnicodeString.
	2510	* @param targetLength the desired length of the string
	2511	* @param padChar the character to use for padding. Defaults to
	2512	* space (U+0020)
	2513	* @return TRUE if the text was padded, FALSE otherwise.
	2514	* @stable ICU 2.0
	2515	*/
	2516	UBool padLeading(int32_t targetLength,
	2517	UChar padChar = 0x0020);
	2518
	2519	/**
	2520	* Pad the end of this UnicodeString with the character <TT>padChar</TT>.
	2521	* If the length of this UnicodeString is less than targetLength,
	2522	* length() - targetLength copies of padChar will be added to the
	2523	* end of this UnicodeString.
	2524	* @param targetLength the desired length of the string
	2525	* @param padChar the character to use for padding. Defaults to
	2526	* space (U+0020)
	2527	* @return TRUE if the text was padded, FALSE otherwise.
	2528	* @stable ICU 2.0
	2529	*/
	2530	UBool padTrailing(int32_t targetLength,
	2531	UChar padChar = 0x0020);
	2532
	2533	/**
	2534	* Truncate this UnicodeString to the <TT>targetLength</TT>.
	2535	* @param targetLength the desired length of this UnicodeString.
	2536	* @return TRUE if the text was truncated, FALSE otherwise
	2537	* @stable ICU 2.0
	2538	*/
	2539	inline UBool truncate(int32_t targetLength);
	2540
	2541	/**
	2542	* Trims leading and trailing whitespace from this UnicodeString.
	2543	* @return a reference to this
	2544	* @stable ICU 2.0
	2545	*/
	2546	UnicodeString& trim(void);
	2547
	2548
	2549	/* Miscellaneous operations */
	2550
	2551	/**
	2552	* Reverse this UnicodeString in place.
	2553	* @return a reference to this
	2554	* @stable ICU 2.0
	2555	*/
	2556	inline UnicodeString& reverse(void);
	2557
	2558	/**
	2559	* Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
	2560	* this UnicodeString.
	2561	* @param start the start of the range to reverse
	2562	* @param length the number of characters to to reverse
	2563	* @return a reference to this
	2564	* @stable ICU 2.0
	2565	*/
	2566	inline UnicodeString& reverse(int32_t start,
	2567	int32_t length);
	2568
	2569	/**
	2570	* Convert the characters in this to UPPER CASE following the conventions of
	2571	* the default locale.
	2572	* @return A reference to this.
	2573	* @stable ICU 2.0
	2574	*/
	2575	UnicodeString& toUpper(void);
	2576
	2577	/**
	2578	* Convert the characters in this to UPPER CASE following the conventions of
	2579	* a specific locale.
	2580	* @param locale The locale containing the conventions to use.
	2581	* @return A reference to this.
	2582	* @stable ICU 2.0
	2583	*/
	2584	UnicodeString& toUpper(const Locale& locale);
	2585
	2586	/**
	2587	* Convert the characters in this to lower case following the conventions of
	2588	* the default locale.
	2589	* @return A reference to this.
	2590	* @stable ICU 2.0
	2591	*/
	2592	UnicodeString& toLower(void);
	2593
	2594	/**
	2595	* Convert the characters in this to lower case following the conventions of
	2596	* a specific locale.
	2597	* @param locale The locale containing the conventions to use.
	2598	* @return A reference to this.
	2599	* @stable ICU 2.0
	2600	*/
	2601	UnicodeString& toLower(const Locale& locale);
	2602
	2603	#if !UCONFIG_NO_BREAK_ITERATION
	2604
	2605	/**
	2606	* Titlecase this string, convenience function using the default locale.
	2607	*
	2608	* Casing is locale-dependent and context-sensitive.
	2609	* Titlecasing uses a break iterator to find the first characters of words
	2610	* that are to be titlecased. It titlecases those characters and lowercases
	2611	* all others.
	2612	*
	2613	* The titlecase break iterator can be provided to customize for arbitrary
	2614	* styles, using rules and dictionaries beyond the standard iterators.
	2615	* It may be more efficient to always provide an iterator to avoid
	2616	* opening and closing one for each string.
	2617	* The standard titlecase iterator for the root locale implements the
	2618	* algorithm of Unicode TR 21.
	2619	*
	2620	* This function uses only the setText(), first() and next() methods of the
	2621	* provided break iterator.
	2622	*
	2623	* @param titleIter A break iterator to find the first characters of words
	2624	* that are to be titlecased.
	2625	* If none is provided (0), then a standard titlecase
	2626	* break iterator is opened.
	2627	* Otherwise the provided iterator is set to the string's text.
	2628	* @return A reference to this.
	2629	* @stable ICU 2.1
	2630	*/
	2631	UnicodeString &toTitle(BreakIterator *titleIter);
	2632
	2633	/**
	2634	* Titlecase this string.
	2635	*
	2636	* Casing is locale-dependent and context-sensitive.
	2637	* Titlecasing uses a break iterator to find the first characters of words
	2638	* that are to be titlecased. It titlecases those characters and lowercases
	2639	* all others.
	2640	*
	2641	* The titlecase break iterator can be provided to customize for arbitrary
	2642	* styles, using rules and dictionaries beyond the standard iterators.
	2643	* It may be more efficient to always provide an iterator to avoid
	2644	* opening and closing one for each string.
	2645	* The standard titlecase iterator for the root locale implements the
	2646	* algorithm of Unicode TR 21.
	2647	*
	2648	* This function uses only the setText(), first() and next() methods of the
	2649	* provided break iterator.
	2650	*
	2651	* @param titleIter A break iterator to find the first characters of words
	2652	* that are to be titlecased.
	2653	* If none is provided (0), then a standard titlecase
	2654	* break iterator is opened.
	2655	* Otherwise the provided iterator is set to the string's text.
	2656	* @param locale The locale to consider.
	2657	* @return A reference to this.
	2658	* @stable ICU 2.1
	2659	*/
	2660	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
	2661
	2662	/**
	2663	* Titlecase this string, with options.
	2664	*
	2665	* Casing is locale-dependent and context-sensitive.
	2666	* Titlecasing uses a break iterator to find the first characters of words
	2667	* that are to be titlecased. It titlecases those characters and lowercases
	2668	* all others. (This can be modified with options.)
	2669	*
	2670	* The titlecase break iterator can be provided to customize for arbitrary
	2671	* styles, using rules and dictionaries beyond the standard iterators.
	2672	* It may be more efficient to always provide an iterator to avoid
	2673	* opening and closing one for each string.
	2674	* The standard titlecase iterator for the root locale implements the
	2675	* algorithm of Unicode TR 21.
	2676	*
	2677	* This function uses only the setText(), first() and next() methods of the
	2678	* provided break iterator.
	2679	*
	2680	* @param titleIter A break iterator to find the first characters of words
	2681	* that are to be titlecased.
	2682	* If none is provided (0), then a standard titlecase
	2683	* break iterator is opened.
	2684	* Otherwise the provided iterator is set to the string's text.
	2685	* @param locale The locale to consider.
	2686	* @param options Options bit set, see ucasemap_open().
	2687	* @return A reference to this.
	2688	* @see U_TITLECASE_NO_LOWERCASE
	2689	* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
	2690	* @see ucasemap_open
	2691	* @stable ICU 3.8
	2692	*/
	2693	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
	2694
	2695	#endif
	2696
	2697	/**
	2698	* Case-folds the characters in this string.
	2699	*
	2700	* Case-folding is locale-independent and not context-sensitive,
	2701	* but there is an option for whether to include or exclude mappings for dotted I
	2702	* and dotless i that are marked with 'T' in CaseFolding.txt.
	2703	*
	2704	* The result may be longer or shorter than the original.
	2705	*
	2706	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
	2707	* @return A reference to this.
	2708	* @stable ICU 2.0
	2709	*/
	2710	UnicodeString &foldCase(uint32_t options=0 /U_FOLD_CASE_DEFAULT/);
	2711
	2712	//========================================
	2713	// Access to the internal buffer
	2714	//========================================
	2715
	2716	/**
	2717	* Get a read/write pointer to the internal buffer.
	2718	* The buffer is guaranteed to be large enough for at least minCapacity UChars,
	2719	* writable, and is still owned by the UnicodeString object.
	2720	* Calls to getBuffer(minCapacity) must not be nested, and
	2721	* must be matched with calls to releaseBuffer(newLength).
	2722	* If the string buffer was read-only or shared,
	2723	* then it will be reallocated and copied.
	2724	*
	2725	* An attempted nested call will return 0, and will not further modify the
	2726	* state of the UnicodeString object.
	2727	* It also returns 0 if the string is bogus.
	2728	*
	2729	* The actual capacity of the string buffer may be larger than minCapacity.
	2730	* getCapacity() returns the actual capacity.
	2731	* For many operations, the full capacity should be used to avoid reallocations.
	2732	*
	2733	* While the buffer is "open" between getBuffer(minCapacity)
	2734	* and releaseBuffer(newLength), the following applies:
	2735	* - The string length is set to 0.
	2736	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
	2737	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
	2738	* - You can read from and write to the returned buffer.
	2739	* - The previous string contents will still be in the buffer;
	2740	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
	2741	* If the length() was greater than minCapacity, then any contents after minCapacity
	2742	* may be lost.
	2743	* The buffer contents is not NUL-terminated by getBuffer().
	2744	* If length()<getCapacity() then you can terminate it by writing a NUL
	2745	* at index length().
	2746	* - You must call releaseBuffer(newLength) before and in order to
	2747	* return to normal UnicodeString operation.
	2748	*
	2749	* @param minCapacity the minimum number of UChars that are to be available
	2750	* in the buffer, starting at the returned pointer;
	2751	* default to the current string capacity if minCapacity==-1
	2752	* @return a writable pointer to the internal string buffer,
	2753	* or 0 if an error occurs (nested calls, out of memory)
	2754	*
	2755	* @see releaseBuffer
	2756	* @see getTerminatedBuffer()
	2757	* @stable ICU 2.0
	2758	*/
	2759	UChar *getBuffer(int32_t minCapacity);
	2760
	2761	/**
	2762	* Release a read/write buffer on a UnicodeString object with an
	2763	* "open" getBuffer(minCapacity).
	2764	* This function must be called in a matched pair with getBuffer(minCapacity).
	2765	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
	2766	*
	2767	* It will set the string length to newLength, at most to the current capacity.
	2768	* If newLength==-1 then it will set the length according to the
	2769	* first NUL in the buffer, or to the capacity if there is no NUL.
	2770	*
	2771	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
	2772	*
	2773	* @param newLength the new length of the UnicodeString object;
	2774	* defaults to the current capacity if newLength is greater than that;
	2775	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
	2776	* the current capacity of the string
	2777	*
	2778	* @see getBuffer(int32_t minCapacity)
	2779	* @stable ICU 2.0
	2780	*/
	2781	void releaseBuffer(int32_t newLength=-1);
	2782
	2783	/**
	2784	* Get a read-only pointer to the internal buffer.
	2785	* This can be called at any time on a valid UnicodeString.
	2786	*
	2787	* It returns 0 if the string is bogus, or
	2788	* during an "open" getBuffer(minCapacity).
	2789	*
	2790	* It can be called as many times as desired.
	2791	* The pointer that it returns will remain valid until the UnicodeString object is modified,
	2792	* at which time the pointer is semantically invalidated and must not be used any more.
	2793	*
	2794	* The capacity of the buffer can be determined with getCapacity().
	2795	* The part after length() may or may not be initialized and valid,
	2796	* depending on the history of the UnicodeString object.
	2797	*
	2798	* The buffer contents is (probably) not NUL-terminated.
	2799	* You can check if it is with
	2800	* <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
	2801	* (See getTerminatedBuffer().)
	2802	*
	2803	* The buffer may reside in read-only memory. Its contents must not
	2804	* be modified.
	2805	*
	2806	* @return a read-only pointer to the internal string buffer,
	2807	* or 0 if the string is empty or bogus
	2808	*
	2809	* @see getBuffer(int32_t minCapacity)
	2810	* @see getTerminatedBuffer()
	2811	* @stable ICU 2.0
	2812	*/
	2813	inline const UChar *getBuffer() const;
	2814
	2815	/**
	2816	* Get a read-only pointer to the internal buffer,
	2817	* making sure that it is NUL-terminated.
	2818	* This can be called at any time on a valid UnicodeString.
	2819	*
	2820	* It returns 0 if the string is bogus, or
	2821	* during an "open" getBuffer(minCapacity), or if the buffer cannot
	2822	* be NUL-terminated (because memory allocation failed).
	2823	*
	2824	* It can be called as many times as desired.
	2825	* The pointer that it returns will remain valid until the UnicodeString object is modified,
	2826	* at which time the pointer is semantically invalidated and must not be used any more.
	2827	*
	2828	* The capacity of the buffer can be determined with getCapacity().
	2829	* The part after length()+1 may or may not be initialized and valid,
	2830	* depending on the history of the UnicodeString object.
	2831	*
	2832	* The buffer contents is guaranteed to be NUL-terminated.
	2833	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
	2834	* is written.
	2835	* For this reason, this function is not const, unlike getBuffer().
	2836	* Note that a UnicodeString may also contain NUL characters as part of its contents.
	2837	*
	2838	* The buffer may reside in read-only memory. Its contents must not
	2839	* be modified.
	2840	*
	2841	* @return a read-only pointer to the internal string buffer,
	2842	* or 0 if the string is empty or bogus
	2843	*
	2844	* @see getBuffer(int32_t minCapacity)
	2845	* @see getBuffer()
	2846	* @stable ICU 2.2
	2847	*/
	2848	inline const UChar *getTerminatedBuffer();
	2849
	2850	//========================================
	2851	// Constructors
	2852	//========================================
	2853
	2854	/** Construct an empty UnicodeString.
	2855	* @stable ICU 2.0
	2856	*/
	2857	inline UnicodeString();
	2858
	2859	/**
	2860	* Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
	2861	* @param capacity the number of UChars this UnicodeString should hold
	2862	* before a resize is necessary; if count is greater than 0 and count
	2863	* code points c take up more space than capacity, then capacity is adjusted
	2864	* accordingly.
	2865	* @param c is used to initially fill the string
	2866	* @param count specifies how many code points c are to be written in the
	2867	* string
	2868	* @stable ICU 2.0
	2869	*/
	2870	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
	2871
	2872	/**
	2873	* Single UChar (code unit) constructor.
	2874	*
	2875	* It is recommended to mark this constructor "explicit" by
	2876	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
	2877	* on the compiler command line or similar.
	2878	* @param ch the character to place in the UnicodeString
	2879	* @stable ICU 2.0
	2880	*/
	2881	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
	2882
	2883	/**
	2884	* Single UChar32 (code point) constructor.
	2885	*
	2886	* It is recommended to mark this constructor "explicit" by
	2887	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
	2888	* on the compiler command line or similar.
	2889	* @param ch the character to place in the UnicodeString
	2890	* @stable ICU 2.0
	2891	*/
	2892	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
	2893
	2894	/**
	2895	* UChar* constructor.
	2896	*
	2897	* It is recommended to mark this constructor "explicit" by
	2898	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
	2899	* on the compiler command line or similar.
	2900	* @param text The characters to place in the UnicodeString. <TT>text</TT>
	2901	* must be NULL (U+0000) terminated.
	2902	* @stable ICU 2.0
	2903	*/
	2904	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
	2905
	2906	/**
	2907	* UChar* constructor.
	2908	* @param text The characters to place in the UnicodeString.
	2909	* @param textLength The number of Unicode characters in <TT>text</TT>
	2910	* to copy.
	2911	* @stable ICU 2.0
	2912	*/
	2913	UnicodeString(const UChar *text,
	2914	int32_t textLength);
	2915
	2916	/**
	2917	* Readonly-aliasing UChar* constructor.
	2918	* The text will be used for the UnicodeString object, but
	2919	* it will not be released when the UnicodeString is destroyed.
	2920	* This has copy-on-write semantics:
	2921	* When the string is modified, then the buffer is first copied into
	2922	* newly allocated memory.
	2923	* The aliased buffer is never modified.
	2924	*
	2925	* In an assignment to another UnicodeString, when using the copy constructor
	2926	* or the assignment operator, the text will be copied.
	2927	* When using fastCopyFrom(), the text will be aliased again,
	2928	* so that both strings then alias the same readonly-text.
	2929	*
	2930	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
	2931	* This must be true if <code>textLength==-1</code>.
	2932	* @param text The characters to alias for the UnicodeString.
	2933	* @param textLength The number of Unicode characters in <code>text</code> to alias.
	2934	* If -1, then this constructor will determine the length
	2935	* by calling <code>u_strlen()</code>.
	2936	* @stable ICU 2.0
	2937	*/
	2938	UnicodeString(UBool isTerminated,
	2939	const UChar *text,
	2940	int32_t textLength);
	2941
	2942	/**
	2943	* Writable-aliasing UChar* constructor.
	2944	* The text will be used for the UnicodeString object, but
	2945	* it will not be released when the UnicodeString is destroyed.
	2946	* This has write-through semantics:
	2947	* For as long as the capacity of the buffer is sufficient, write operations
	2948	* will directly affect the buffer. When more capacity is necessary, then
	2949	* a new buffer will be allocated and the contents copied as with regularly
	2950	* constructed strings.
	2951	* In an assignment to another UnicodeString, the buffer will be copied.
	2952	* The extract(UChar *dst) function detects whether the dst pointer is the same
	2953	* as the string buffer itself and will in this case not copy the contents.
	2954	*
	2955	* @param buffer The characters to alias for the UnicodeString.
	2956	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
	2957	* @param buffCapacity The size of <code>buffer</code> in UChars.
	2958	* @stable ICU 2.0
	2959	*/
	2960	UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
	2961
	2962	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
	2963
	2964	/**
	2965	* char* constructor.
	2966	* Uses the default converter (and thus depends on the ICU conversion code)
	2967	* unless U_CHARSET_IS_UTF8 is set to 1.
	2968	*
	2969	* For ASCII (really "invariant character") strings it is more efficient to use
	2970	* the constructor that takes a US_INV (for its enum EInvariant).
	2971	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
	2972	* UNICODE_STRING_SIMPLE.
	2973	*
	2974	* It is recommended to mark this constructor "explicit" by
	2975	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
	2976	* on the compiler command line or similar.
	2977	* @param codepageData an array of bytes, null-terminated,
	2978	* in the platform's default codepage.
	2979	* @stable ICU 2.0
	2980	* @see UNICODE_STRING
	2981	* @see UNICODE_STRING_SIMPLE
	2982	*/
	2983	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
	2984
	2985	/**
	2986	* char* constructor.
	2987	* Uses the default converter (and thus depends on the ICU conversion code)
	2988	* unless U_CHARSET_IS_UTF8 is set to 1.
	2989	* @param codepageData an array of bytes in the platform's default codepage.
	2990	* @param dataLength The number of bytes in <TT>codepageData</TT>.
	2991	* @stable ICU 2.0
	2992	*/
	2993	UnicodeString(const char *codepageData, int32_t dataLength);
	2994
	2995	#endif
	2996
	2997	#if !UCONFIG_NO_CONVERSION
	2998
	2999	/**
	3000	* char* constructor.
	3001	* @param codepageData an array of bytes, null-terminated
	3002	* @param codepage the encoding of <TT>codepageData</TT>. The special
	3003	* value 0 for <TT>codepage</TT> indicates that the text is in the
	3004	* platform's default codepage.
	3005	*
	3006	* If <code>codepage</code> is an empty string (<code>""</code>),
	3007	* then a simple conversion is performed on the codepage-invariant
	3008	* subset ("invariant characters") of the platform encoding. See utypes.h.
	3009	* Recommendation: For invariant-character strings use the constructor
	3010	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
	3011	* because it avoids object code dependencies of UnicodeString on
	3012	* the conversion code.
	3013	*
	3014	* @stable ICU 2.0
	3015	*/
	3016	UnicodeString(const char codepageData, const char codepage);
	3017
	3018	/**
	3019	* char* constructor.
	3020	* @param codepageData an array of bytes.
	3021	* @param dataLength The number of bytes in <TT>codepageData</TT>.
	3022	* @param codepage the encoding of <TT>codepageData</TT>. The special
	3023	* value 0 for <TT>codepage</TT> indicates that the text is in the
	3024	* platform's default codepage.
	3025	* If <code>codepage</code> is an empty string (<code>""</code>),
	3026	* then a simple conversion is performed on the codepage-invariant
	3027	* subset ("invariant characters") of the platform encoding. See utypes.h.
	3028	* Recommendation: For invariant-character strings use the constructor
	3029	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
	3030	* because it avoids object code dependencies of UnicodeString on
	3031	* the conversion code.
	3032	*
	3033	* @stable ICU 2.0
	3034	*/
	3035	UnicodeString(const char codepageData, int32_t dataLength, const char codepage);
	3036
	3037	/**
	3038	* char * / UConverter constructor.
	3039	* This constructor uses an existing UConverter object to
	3040	* convert the codepage string to Unicode and construct a UnicodeString
	3041	* from that.
	3042	*
	3043	* The converter is reset at first.
	3044	* If the error code indicates a failure before this constructor is called,
	3045	* or if an error occurs during conversion or construction,
	3046	* then the string will be bogus.
	3047	*
	3048	* This function avoids the overhead of opening and closing a converter if
	3049	* multiple strings are constructed.
	3050	*
	3051	* @param src input codepage string
	3052	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
	3053	* @param cnv converter object (ucnv_resetToUnicode() will be called),
	3054	* can be NULL for the default converter
	3055	* @param errorCode normal ICU error code
	3056	* @stable ICU 2.0
	3057	*/
	3058	UnicodeString(
	3059	const char *src, int32_t srcLength,
	3060	UConverter *cnv,
	3061	UErrorCode &errorCode);
	3062
	3063	#endif
	3064
	3065	/**
	3066	* Constructs a Unicode string from an invariant-character char * string.
	3067	* About invariant characters see utypes.h.
	3068	* This constructor has no runtime dependency on conversion code and is
	3069	* therefore recommended over ones taking a charset name string
	3070	* (where the empty string "" indicates invariant-character conversion).
	3071	*
	3072	* Use the macro US_INV as the third, signature-distinguishing parameter.
	3073	*
	3074	* For example:
	3075	* \code
	3076	* void fn(const char *s) {
	3077	* UnicodeString ustr(s, -1, US_INV);
	3078	* // use ustr ...
	3079	* }
	3080	* \endcode
	3081	*
	3082	* @param src String using only invariant characters.
	3083	* @param length Length of src, or -1 if NUL-terminated.
	3084	* @param inv Signature-distinguishing paramater, use US_INV.
	3085	*
	3086	* @see US_INV
	3087	* @stable ICU 3.2
	3088	*/
	3089	UnicodeString(const char *src, int32_t length, enum EInvariant inv);
	3090
	3091
	3092	/**
	3093	* Copy constructor.
	3094	* @param that The UnicodeString object to copy.
	3095	* @stable ICU 2.0
	3096	*/
	3097	UnicodeString(const UnicodeString& that);
	3098
	3099	/**
	3100	* 'Substring' constructor from tail of source string.
	3101	* @param src The UnicodeString object to copy.
	3102	* @param srcStart The offset into <tt>src</tt> at which to start copying.
	3103	* @stable ICU 2.2
	3104	*/
	3105	UnicodeString(const UnicodeString& src, int32_t srcStart);
	3106
	3107	/**
	3108	* 'Substring' constructor from subrange of source string.
	3109	* @param src The UnicodeString object to copy.
	3110	* @param srcStart The offset into <tt>src</tt> at which to start copying.
	3111	* @param srcLength The number of characters from <tt>src</tt> to copy.
	3112	* @stable ICU 2.2
	3113	*/
	3114	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
	3115
	3116	/**
	3117	* Clone this object, an instance of a subclass of Replaceable.
	3118	* Clones can be used concurrently in multiple threads.
	3119	* If a subclass does not implement clone(), or if an error occurs,
	3120	* then NULL is returned.
	3121	* The clone functions in all subclasses return a pointer to a Replaceable
	3122	* because some compilers do not support covariant (same-as-this)
	3123	* return types; cast to the appropriate subclass if necessary.
	3124	* The caller must delete the clone.
	3125	*
	3126	* @return a clone of this object
	3127	*
	3128	* @see Replaceable::clone
	3129	* @see getDynamicClassID
	3130	* @stable ICU 2.6
	3131	*/
	3132	virtual Replaceable *clone() const;
	3133
	3134	/** Destructor.
	3135	* @stable ICU 2.0
	3136	*/
	3137	virtual ~UnicodeString();
	3138
	3139	/**
	3140	* Create a UnicodeString from a UTF-8 string.
	3141	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
	3142	* Calls u_strFromUTF8WithSub().
	3143	*
	3144	* @param utf8 UTF-8 input string.
	3145	* Note that a StringPiece can be implicitly constructed
	3146	* from a std::string or a NUL-terminated const char * string.
	3147	* @return A UnicodeString with equivalent UTF-16 contents.
	3148	* @see toUTF8
	3149	* @see toUTF8String
	3150	* @stable ICU 4.2
	3151	*/
	3152	static UnicodeString fromUTF8(const StringPiece &utf8);
	3153
	3154	/**
	3155	* Create a UnicodeString from a UTF-32 string.
	3156	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
	3157	* Calls u_strFromUTF32WithSub().
	3158	*
	3159	* @param utf32 UTF-32 input string. Must not be NULL.
	3160	* @param length Length of the input string, or -1 if NUL-terminated.
	3161	* @return A UnicodeString with equivalent UTF-16 contents.
	3162	* @see toUTF32
	3163	* @stable ICU 4.2
	3164	*/
	3165	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
	3166
	3167	/* Miscellaneous operations */
	3168
	3169	/**
	3170	* Unescape a string of characters and return a string containing
	3171	* the result. The following escape sequences are recognized:
	3172	*
	3173	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
	3174	* \\Uhhhhhhhh 8 hex digits
	3175	* \\xhh 1-2 hex digits
	3176	* \\ooo 1-3 octal digits; o in [0-7]
	3177	* \\cX control-X; X is masked with 0x1F
	3178	*
	3179	* as well as the standard ANSI C escapes:
	3180	*
	3181	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
	3182	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
	3183	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
	3184	*
	3185	* Anything else following a backslash is generically escaped. For
	3186	* example, "[a\\-z]" returns "[a-z]".
	3187	*
	3188	* If an escape sequence is ill-formed, this method returns an empty
	3189	* string. An example of an ill-formed sequence is "\\u" followed by
	3190	* fewer than 4 hex digits.
	3191	*
	3192	* This function is similar to u_unescape() but not identical to it.
	3193	* The latter takes a source char*, so it does escape recognition
	3194	* and also invariant conversion.
	3195	*
	3196	* @return a string with backslash escapes interpreted, or an
	3197	* empty string on error.
	3198	* @see UnicodeString#unescapeAt()
	3199	* @see u_unescape()
	3200	* @see u_unescapeAt()
	3201	* @stable ICU 2.0
	3202	*/
	3203	UnicodeString unescape() const;
	3204
	3205	/**
	3206	* Unescape a single escape sequence and return the represented
	3207	* character. See unescape() for a listing of the recognized escape
	3208	* sequences. The character at offset-1 is assumed (without
	3209	* checking) to be a backslash. If the escape sequence is
	3210	* ill-formed, or the offset is out of range, U_SENTINEL=-1 is
	3211	* returned.
	3212	*
	3213	* @param offset an input output parameter. On input, it is the
	3214	* offset into this string where the escape sequence is located,
	3215	* after the initial backslash. On output, it is advanced after the
	3216	* last character parsed. On error, it is not advanced at all.
	3217	* @return the character represented by the escape sequence at
	3218	* offset, or U_SENTINEL=-1 on error.
	3219	* @see UnicodeString#unescape()
	3220	* @see u_unescape()
	3221	* @see u_unescapeAt()
	3222	* @stable ICU 2.0
	3223	*/
	3224	UChar32 unescapeAt(int32_t &offset) const;
	3225
	3226	/**
	3227	* ICU "poor man's RTTI", returns a UClassID for this class.
	3228	*
	3229	* @stable ICU 2.2
	3230	*/
	3231	static UClassID U_EXPORT2 getStaticClassID();
	3232
	3233	/**
	3234	* ICU "poor man's RTTI", returns a UClassID for the actual class.
	3235	*
	3236	* @stable ICU 2.2
	3237	*/
	3238	virtual UClassID getDynamicClassID() const;
	3239
	3240	//========================================
	3241	// Implementation methods
	3242	//========================================
	3243
	3244	protected:
	3245	/**
	3246	* Implement Replaceable::getLength() (see jitterbug 1027).
	3247	* @stable ICU 2.4
	3248	*/
	3249	virtual int32_t getLength() const;
	3250
	3251	/**
	3252	* The change in Replaceable to use virtual getCharAt() allows
	3253	* UnicodeString::charAt() to be inline again (see jitterbug 709).
	3254	* @stable ICU 2.4
	3255	*/
	3256	virtual UChar getCharAt(int32_t offset) const;
	3257
	3258	/**
	3259	* The change in Replaceable to use virtual getChar32At() allows
	3260	* UnicodeString::char32At() to be inline again (see jitterbug 709).
	3261	* @stable ICU 2.4
	3262	*/
	3263	virtual UChar32 getChar32At(int32_t offset) const;
	3264
	3265	private:
	3266	// For char* constructors. Could be made public.
	3267	UnicodeString &setToUTF8(const StringPiece &utf8);
	3268	// For extract(char*).
	3269	// We could make a toUTF8(target, capacity, errorCode) public but not
	3270	// this version: New API will be cleaner if we make callers create substrings
	3271	// rather than having start+length on every method,
	3272	// and it should take a UErrorCode&.
	3273	int32_t
	3274	toUTF8(int32_t start, int32_t len,
	3275	char *target, int32_t capacity) const;
	3276
	3277	/**
	3278	* Internal string contents comparison, called by operator==.
	3279	* Requires: this & text not bogus and have same lengths.
	3280	*/
	3281	UBool doEquals(const UnicodeString &text, int32_t len) const;
	3282
	3283	inline int8_t
	3284	doCompare(int32_t start,
	3285	int32_t length,
	3286	const UnicodeString& srcText,
	3287	int32_t srcStart,
	3288	int32_t srcLength) const;
	3289
	3290	int8_t doCompare(int32_t start,
	3291	int32_t length,
	3292	const UChar *srcChars,
	3293	int32_t srcStart,
	3294	int32_t srcLength) const;
	3295
	3296	inline int8_t
	3297	doCompareCodePointOrder(int32_t start,
	3298	int32_t length,
	3299	const UnicodeString& srcText,
	3300	int32_t srcStart,
	3301	int32_t srcLength) const;
	3302
	3303	int8_t doCompareCodePointOrder(int32_t start,
	3304	int32_t length,
	3305	const UChar *srcChars,
	3306	int32_t srcStart,
	3307	int32_t srcLength) const;
	3308
	3309	inline int8_t
	3310	doCaseCompare(int32_t start,
	3311	int32_t length,
	3312	const UnicodeString &srcText,
	3313	int32_t srcStart,
	3314	int32_t srcLength,
	3315	uint32_t options) const;
	3316
	3317	int8_t
	3318	doCaseCompare(int32_t start,
	3319	int32_t length,
	3320	const UChar *srcChars,
	3321	int32_t srcStart,
	3322	int32_t srcLength,
	3323	uint32_t options) const;
	3324
	3325	int32_t doIndexOf(UChar c,
	3326	int32_t start,
	3327	int32_t length) const;
	3328
	3329	int32_t doIndexOf(UChar32 c,
	3330	int32_t start,
	3331	int32_t length) const;
	3332
	3333	int32_t doLastIndexOf(UChar c,
	3334	int32_t start,
	3335	int32_t length) const;
	3336
	3337	int32_t doLastIndexOf(UChar32 c,
	3338	int32_t start,
	3339	int32_t length) const;
	3340
	3341	void doExtract(int32_t start,
	3342	int32_t length,
	3343	UChar *dst,
	3344	int32_t dstStart) const;
	3345
	3346	inline void doExtract(int32_t start,
	3347	int32_t length,
	3348	UnicodeString& target) const;
	3349
	3350	inline UChar doCharAt(int32_t offset) const;
	3351
	3352	UnicodeString& doReplace(int32_t start,
	3353	int32_t length,
	3354	const UnicodeString& srcText,
	3355	int32_t srcStart,
	3356	int32_t srcLength);
	3357
	3358	UnicodeString& doReplace(int32_t start,
	3359	int32_t length,
	3360	const UChar *srcChars,
	3361	int32_t srcStart,
	3362	int32_t srcLength);
	3363
	3364	UnicodeString& doReverse(int32_t start,
	3365	int32_t length);
	3366
	3367	// calculate hash code
	3368	int32_t doHashCode(void) const;
	3369
	3370	// get pointer to start of array
	3371	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
	3372	inline UChar* getArrayStart(void);
	3373	inline const UChar* getArrayStart(void) const;
	3374
	3375	// A UnicodeString object (not necessarily its current buffer)
	3376	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
	3377	inline UBool isWritable() const;
	3378
	3379	// Is the current buffer writable?
	3380	inline UBool isBufferWritable() const;
	3381
	3382	// None of the following does releaseArray().
	3383	inline void setLength(int32_t len); // sets only fShortLength and fLength
	3384	inline void setToEmpty(); // sets fFlags=kShortString
	3385	inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
	3386
	3387	// allocate the array; result may be fStackBuffer
	3388	// sets refCount to 1 if appropriate
	3389	// sets fArray, fCapacity, and fFlags
	3390	// returns boolean for success or failure
	3391	UBool allocate(int32_t capacity);
	3392
	3393	// release the array if owned
	3394	void releaseArray(void);
	3395
	3396	// turn a bogus string into an empty one
	3397	void unBogus();
	3398
	3399	// implements assigment operator, copy constructor, and fastCopyFrom()
	3400	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
	3401
	3402	// Pin start and limit to acceptable values.
	3403	inline void pinIndex(int32_t& start) const;
	3404	inline void pinIndices(int32_t& start,
	3405	int32_t& length) const;
	3406
	3407	#if !UCONFIG_NO_CONVERSION
	3408
	3409	/* Internal extract() using UConverter. */
	3410	int32_t doExtract(int32_t start, int32_t length,
	3411	char *dest, int32_t destCapacity,
	3412	UConverter *cnv,
	3413	UErrorCode &errorCode) const;
	3414
	3415	/*
	3416	* Real constructor for converting from codepage data.
	3417	* It assumes that it is called with !fRefCounted.
	3418	*
	3419	* If <code>codepage==0</code>, then the default converter
	3420	* is used for the platform encoding.
	3421	* If <code>codepage</code> is an empty string (<code>""</code>),
	3422	* then a simple conversion is performed on the codepage-invariant
	3423	* subset ("invariant characters") of the platform encoding. See utypes.h.
	3424	*/
	3425	void doCodepageCreate(const char *codepageData,
	3426	int32_t dataLength,
	3427	const char *codepage);
	3428
	3429	/*
	3430	* Worker function for creating a UnicodeString from
	3431	* a codepage string using a UConverter.
	3432	*/
	3433	void
	3434	doCodepageCreate(const char *codepageData,
	3435	int32_t dataLength,
	3436	UConverter *converter,
	3437	UErrorCode &status);
	3438
	3439	#endif
	3440
	3441	/*
	3442	* This function is called when write access to the array
	3443	* is necessary.
	3444	*
	3445	* We need to make a copy of the array if
	3446	* the buffer is read-only, or
	3447	* the buffer is refCounted (shared), and refCount>1, or
	3448	* the buffer is too small.
	3449	*
	3450	* Return FALSE if memory could not be allocated.
	3451	*/
	3452	UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
	3453	int32_t growCapacity = -1,
	3454	UBool doCopyArray = TRUE,
	3455	int32_t **pBufferToDelete = 0,
	3456	UBool forceClone = FALSE);
	3457
	3458	/**
	3459	* Common function for UnicodeString case mappings.
	3460	* The stringCaseMapper has the same type UStringCaseMapper
	3461	* as in ustr_imp.h for ustrcase_map().
	3462	*/
	3463	UnicodeString &
	3464	caseMap(const UCaseMap csm, UStringCaseMapper stringCaseMapper);
	3465
	3466	// ref counting
	3467	void addRef(void);
	3468	int32_t removeRef(void);
	3469	int32_t refCount(void) const;
	3470
	3471	// constants
	3472	enum {
	3473	// Set the stack buffer size so that sizeof(UnicodeString) is,
	3474	// naturally (without padding), a multiple of sizeof(pointer).
	3475	US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
	3476	kInvalidUChar=0xffff, // invalid UChar index
	3477	kGrowSize=128, // grow size for this buffer
	3478	kInvalidHashCode=0, // invalid hash code
	3479	kEmptyHashCode=1, // hash code for empty string
	3480
	3481	// bit flag values for fFlags
	3482	kIsBogus=1, // this string is bogus, i.e., not valid or NULL
	3483	kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
	3484	kRefCounted=4, // there is a refCount field before the characters in fArray
	3485	kBufferIsReadonly=8,// do not write to this buffer
	3486	kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
	3487	// and releaseBuffer(newLength) must be called
	3488
	3489	// combined values for convenience
	3490	kShortString=kUsingStackBuffer,
	3491	kLongString=kRefCounted,
	3492	kReadonlyAlias=kBufferIsReadonly,
	3493	kWritableAlias=0
	3494	};
	3495
	3496	friend class StringThreadTest;
	3497	friend class UnicodeStringAppendable;
	3498
	3499	union StackBufferOrFields; // forward declaration necessary before friend declaration
	3500	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
	3501
	3502	/*
	3503	* The following are all the class fields that are stored
	3504	* in each UnicodeString object.
	3505	* Note that UnicodeString has virtual functions,
	3506	* therefore there is an implicit vtable pointer
	3507	* as the first real field.
	3508	* The fields should be aligned such that no padding is necessary.
	3509	* On 32-bit machines, the size should be 32 bytes,
	3510	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
	3511	*
	3512	* We use a hack to achieve this.
	3513	*
	3514	* With at least some compilers, each of the following is forced to
	3515	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
	3516	* rounded up with additional padding if the fields do not already fit that requirement:
	3517	* - sizeof(class UnicodeString)
	3518	* - offsetof(UnicodeString, fUnion)
	3519	* - sizeof(fUnion)
	3520	* - sizeof(fFields)
	3521	*
	3522	* In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
	3523	* which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
	3524	* (Padding at the end of fFields is ok:
	3525	* As long as there is no padding after fStackBuffer, it is not wasted space.)
	3526	*
	3527	* We further assume that the compiler does not reorder the fields,
	3528	* so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
	3529	* with at most some padding (but no other field) in between.
	3530	* (Padding there would be wasted space, but functionally harmless.)
	3531	*
	3532	* We use a few more sizeof(pointer)'s chunks of space with
	3533	* fRestOfStackBuffer, fShortLength and fFlags,
	3534	* to get up exactly to the intended sizeof(UnicodeString).
	3535	*/
	3536	// (implicit) *vtable;
	3537	union StackBufferOrFields {
	3538	// fStackBuffer is used iff (fFlags&kUsingStackBuffer)
	3539	// else fFields is used
	3540	UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
	3541	struct {
	3542	UChar *fArray; // the Unicode data
	3543	int32_t fCapacity; // capacity of fArray (in UChars)
	3544	int32_t fLength; // number of characters in fArray if >127; else undefined
	3545	} fFields;
	3546	} fUnion;
	3547	UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
	3548	int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
	3549	uint8_t fFlags; // bit flags: see constants above
	3550	};
	3551
	3552	/**
	3553	* Create a new UnicodeString with the concatenation of two others.
	3554	*
	3555	* @param s1 The first string to be copied to the new one.
	3556	* @param s2 The second string to be copied to the new one, after s1.
	3557	* @return UnicodeString(s1).append(s2)
	3558	* @stable ICU 2.8
	3559	*/
	3560	U_COMMON_API UnicodeString U_EXPORT2
	3561	operator+ (const UnicodeString &s1, const UnicodeString &s2);
	3562
	3563	//========================================
	3564	// Inline members
	3565	//========================================
	3566
	3567	//========================================
	3568	// Privates
	3569	//========================================
	3570
	3571	inline void
	3572	UnicodeString::pinIndex(int32_t& start) const
	3573	{
	3574	// pin index
	3575	if(start < 0) {
	3576	start = 0;
	3577	} else if(start > length()) {
	3578	start = length();
	3579	}
	3580	}
	3581
	3582	inline void
	3583	UnicodeString::pinIndices(int32_t& start,
	3584	int32_t& _length) const
	3585	{
	3586	// pin indices
	3587	int32_t len = length();
	3588	if(start < 0) {
	3589	start = 0;
	3590	} else if(start > len) {
	3591	start = len;
	3592	}
	3593	if(_length < 0) {
	3594	_length = 0;
	3595	} else if(_length > (len - start)) {
	3596	_length = (len - start);
	3597	}
	3598	}
	3599
	3600	inline UChar*
	3601	UnicodeString::getArrayStart()
	3602	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
	3603
	3604	inline const UChar*
	3605	UnicodeString::getArrayStart() const
	3606	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
	3607
	3608	//========================================
	3609	// Default constructor
	3610	//========================================
	3611
	3612	inline
	3613	UnicodeString::UnicodeString()
	3614	: fShortLength(0),
	3615	fFlags(kShortString)
	3616	{}
	3617
	3618	//========================================
	3619	// Read-only implementation methods
	3620	//========================================
	3621	inline int32_t
	3622	UnicodeString::length() const
	3623	{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
	3624
	3625	inline int32_t
	3626	UnicodeString::getCapacity() const
	3627	{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
	3628
	3629	inline int32_t
	3630	UnicodeString::hashCode() const
	3631	{ return doHashCode(); }
	3632
	3633	inline UBool
	3634	UnicodeString::isBogus() const
	3635	{ return (UBool)(fFlags & kIsBogus); }
	3636
	3637	inline UBool
	3638	UnicodeString::isWritable() const
	3639	{ return (UBool)!(fFlags&(kOpenGetBuffer\|kIsBogus)); }
	3640
	3641	inline UBool
	3642	UnicodeString::isBufferWritable() const
	3643	{
	3644	return (UBool)(
	3645	!(fFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
	3646	(!(fFlags&kRefCounted) \|\| refCount()==1));
	3647	}
	3648
	3649	inline const UChar *
	3650	UnicodeString::getBuffer() const {
	3651	if(fFlags&(kIsBogus\|kOpenGetBuffer)) {
	3652	return 0;
	3653	} else if(fFlags&kUsingStackBuffer) {
	3654	return fUnion.fStackBuffer;
	3655	} else {
	3656	return fUnion.fFields.fArray;
	3657	}
	3658	}
	3659
	3660	//========================================
	3661	// Read-only alias methods
	3662	//========================================
	3663	inline int8_t
	3664	UnicodeString::doCompare(int32_t start,
	3665	int32_t thisLength,
	3666	const UnicodeString& srcText,
	3667	int32_t srcStart,
	3668	int32_t srcLength) const
	3669	{
	3670	if(srcText.isBogus()) {
	3671	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3672	} else {
	3673	srcText.pinIndices(srcStart, srcLength);
	3674	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
	3675	}
	3676	}
	3677
	3678	inline UBool
	3679	UnicodeString::operator== (const UnicodeString& text) const
	3680	{
	3681	if(isBogus()) {
	3682	return text.isBogus();
	3683	} else {
	3684	int32_t len = length(), textLength = text.length();
	3685	return !text.isBogus() && len == textLength && doEquals(text, len);
	3686	}
	3687	}
	3688
	3689	inline UBool
	3690	UnicodeString::operator!= (const UnicodeString& text) const
	3691	{ return (! operator==(text)); }
	3692
	3693	inline UBool
	3694	UnicodeString::operator> (const UnicodeString& text) const
	3695	{ return doCompare(0, length(), text, 0, text.length()) == 1; }
	3696
	3697	inline UBool
	3698	UnicodeString::operator< (const UnicodeString& text) const
	3699	{ return doCompare(0, length(), text, 0, text.length()) == -1; }
	3700
	3701	inline UBool
	3702	UnicodeString::operator>= (const UnicodeString& text) const
	3703	{ return doCompare(0, length(), text, 0, text.length()) != -1; }
	3704
	3705	inline UBool
	3706	UnicodeString::operator<= (const UnicodeString& text) const
	3707	{ return doCompare(0, length(), text, 0, text.length()) != 1; }
	3708
	3709	inline int8_t
	3710	UnicodeString::compare(const UnicodeString& text) const
	3711	{ return doCompare(0, length(), text, 0, text.length()); }
	3712
	3713	inline int8_t
	3714	UnicodeString::compare(int32_t start,
	3715	int32_t _length,
	3716	const UnicodeString& srcText) const
	3717	{ return doCompare(start, _length, srcText, 0, srcText.length()); }
	3718
	3719	inline int8_t
	3720	UnicodeString::compare(const UChar *srcChars,
	3721	int32_t srcLength) const
	3722	{ return doCompare(0, length(), srcChars, 0, srcLength); }
	3723
	3724	inline int8_t
	3725	UnicodeString::compare(int32_t start,
	3726	int32_t _length,
	3727	const UnicodeString& srcText,
	3728	int32_t srcStart,
	3729	int32_t srcLength) const
	3730	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
	3731
	3732	inline int8_t
	3733	UnicodeString::compare(int32_t start,
	3734	int32_t _length,
	3735	const UChar *srcChars) const
	3736	{ return doCompare(start, _length, srcChars, 0, _length); }
	3737
	3738	inline int8_t
	3739	UnicodeString::compare(int32_t start,
	3740	int32_t _length,
	3741	const UChar *srcChars,
	3742	int32_t srcStart,
	3743	int32_t srcLength) const
	3744	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
	3745
	3746	inline int8_t
	3747	UnicodeString::compareBetween(int32_t start,
	3748	int32_t limit,
	3749	const UnicodeString& srcText,
	3750	int32_t srcStart,
	3751	int32_t srcLimit) const
	3752	{ return doCompare(start, limit - start,
	3753	srcText, srcStart, srcLimit - srcStart); }
	3754
	3755	inline int8_t
	3756	UnicodeString::doCompareCodePointOrder(int32_t start,
	3757	int32_t thisLength,
	3758	const UnicodeString& srcText,
	3759	int32_t srcStart,
	3760	int32_t srcLength) const
	3761	{
	3762	if(srcText.isBogus()) {
	3763	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3764	} else {
	3765	srcText.pinIndices(srcStart, srcLength);
	3766	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
	3767	}
	3768	}
	3769
	3770	inline int8_t
	3771	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
	3772	{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
	3773
	3774	inline int8_t
	3775	UnicodeString::compareCodePointOrder(int32_t start,
	3776	int32_t _length,
	3777	const UnicodeString& srcText) const
	3778	{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
	3779
	3780	inline int8_t
	3781	UnicodeString::compareCodePointOrder(const UChar *srcChars,
	3782	int32_t srcLength) const
	3783	{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
	3784
	3785	inline int8_t
	3786	UnicodeString::compareCodePointOrder(int32_t start,
	3787	int32_t _length,
	3788	const UnicodeString& srcText,
	3789	int32_t srcStart,
	3790	int32_t srcLength) const
	3791	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
	3792
	3793	inline int8_t
	3794	UnicodeString::compareCodePointOrder(int32_t start,
	3795	int32_t _length,
	3796	const UChar *srcChars) const
	3797	{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
	3798
	3799	inline int8_t
	3800	UnicodeString::compareCodePointOrder(int32_t start,
	3801	int32_t _length,
	3802	const UChar *srcChars,
	3803	int32_t srcStart,
	3804	int32_t srcLength) const
	3805	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
	3806
	3807	inline int8_t
	3808	UnicodeString::compareCodePointOrderBetween(int32_t start,
	3809	int32_t limit,
	3810	const UnicodeString& srcText,
	3811	int32_t srcStart,
	3812	int32_t srcLimit) const
	3813	{ return doCompareCodePointOrder(start, limit - start,
	3814	srcText, srcStart, srcLimit - srcStart); }
	3815
	3816	inline int8_t
	3817	UnicodeString::doCaseCompare(int32_t start,
	3818	int32_t thisLength,
	3819	const UnicodeString &srcText,
	3820	int32_t srcStart,
	3821	int32_t srcLength,
	3822	uint32_t options) const
	3823	{
	3824	if(srcText.isBogus()) {
	3825	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3826	} else {
	3827	srcText.pinIndices(srcStart, srcLength);
	3828	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
	3829	}
	3830	}
	3831
	3832	inline int8_t
	3833	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
	3834	return doCaseCompare(0, length(), text, 0, text.length(), options);
	3835	}
	3836
	3837	inline int8_t
	3838	UnicodeString::caseCompare(int32_t start,
	3839	int32_t _length,
	3840	const UnicodeString &srcText,
	3841	uint32_t options) const {
	3842	return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
	3843	}
	3844
	3845	inline int8_t
	3846	UnicodeString::caseCompare(const UChar *srcChars,
	3847	int32_t srcLength,
	3848	uint32_t options) const {
	3849	return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
	3850	}
	3851
	3852	inline int8_t
	3853	UnicodeString::caseCompare(int32_t start,
	3854	int32_t _length,
	3855	const UnicodeString &srcText,
	3856	int32_t srcStart,
	3857	int32_t srcLength,
	3858	uint32_t options) const {
	3859	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
	3860	}
	3861
	3862	inline int8_t
	3863	UnicodeString::caseCompare(int32_t start,
	3864	int32_t _length,
	3865	const UChar *srcChars,
	3866	uint32_t options) const {
	3867	return doCaseCompare(start, _length, srcChars, 0, _length, options);
	3868	}
	3869
	3870	inline int8_t
	3871	UnicodeString::caseCompare(int32_t start,
	3872	int32_t _length,
	3873	const UChar *srcChars,
	3874	int32_t srcStart,
	3875	int32_t srcLength,
	3876	uint32_t options) const {
	3877	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
	3878	}
	3879
	3880	inline int8_t
	3881	UnicodeString::caseCompareBetween(int32_t start,
	3882	int32_t limit,
	3883	const UnicodeString &srcText,
	3884	int32_t srcStart,
	3885	int32_t srcLimit,
	3886	uint32_t options) const {
	3887	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
	3888	}
	3889
	3890	inline int32_t
	3891	UnicodeString::indexOf(const UnicodeString& srcText,
	3892	int32_t srcStart,
	3893	int32_t srcLength,
	3894	int32_t start,
	3895	int32_t _length) const
	3896	{
	3897	if(!srcText.isBogus()) {
	3898	srcText.pinIndices(srcStart, srcLength);
	3899	if(srcLength > 0) {
	3900	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
	3901	}
	3902	}
	3903	return -1;
	3904	}
	3905
	3906	inline int32_t
	3907	UnicodeString::indexOf(const UnicodeString& text) const
	3908	{ return indexOf(text, 0, text.length(), 0, length()); }
	3909
	3910	inline int32_t
	3911	UnicodeString::indexOf(const UnicodeString& text,
	3912	int32_t start) const {
	3913	pinIndex(start);
	3914	return indexOf(text, 0, text.length(), start, length() - start);
	3915	}
	3916
	3917	inline int32_t
	3918	UnicodeString::indexOf(const UnicodeString& text,
	3919	int32_t start,
	3920	int32_t _length) const
	3921	{ return indexOf(text, 0, text.length(), start, _length); }
	3922
	3923	inline int32_t
	3924	UnicodeString::indexOf(const UChar *srcChars,
	3925	int32_t srcLength,
	3926	int32_t start) const {
	3927	pinIndex(start);
	3928	return indexOf(srcChars, 0, srcLength, start, length() - start);
	3929	}
	3930
	3931	inline int32_t
	3932	UnicodeString::indexOf(const UChar *srcChars,
	3933	int32_t srcLength,
	3934	int32_t start,
	3935	int32_t _length) const
	3936	{ return indexOf(srcChars, 0, srcLength, start, _length); }
	3937
	3938	inline int32_t
	3939	UnicodeString::indexOf(UChar c,
	3940	int32_t start,
	3941	int32_t _length) const
	3942	{ return doIndexOf(c, start, _length); }
	3943
	3944	inline int32_t
	3945	UnicodeString::indexOf(UChar32 c,
	3946	int32_t start,
	3947	int32_t _length) const
	3948	{ return doIndexOf(c, start, _length); }
	3949
	3950	inline int32_t
	3951	UnicodeString::indexOf(UChar c) const
	3952	{ return doIndexOf(c, 0, length()); }
	3953
	3954	inline int32_t
	3955	UnicodeString::indexOf(UChar32 c) const
	3956	{ return indexOf(c, 0, length()); }
	3957
	3958	inline int32_t
	3959	UnicodeString::indexOf(UChar c,
	3960	int32_t start) const {
	3961	pinIndex(start);
	3962	return doIndexOf(c, start, length() - start);
	3963	}
	3964
	3965	inline int32_t
	3966	UnicodeString::indexOf(UChar32 c,
	3967	int32_t start) const {
	3968	pinIndex(start);
	3969	return indexOf(c, start, length() - start);
	3970	}
	3971
	3972	inline int32_t
	3973	UnicodeString::lastIndexOf(const UChar *srcChars,
	3974	int32_t srcLength,
	3975	int32_t start,
	3976	int32_t _length) const
	3977	{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
	3978
	3979	inline int32_t
	3980	UnicodeString::lastIndexOf(const UChar *srcChars,
	3981	int32_t srcLength,
	3982	int32_t start) const {
	3983	pinIndex(start);
	3984	return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
	3985	}
	3986
	3987	inline int32_t
	3988	UnicodeString::lastIndexOf(const UnicodeString& srcText,
	3989	int32_t srcStart,
	3990	int32_t srcLength,
	3991	int32_t start,
	3992	int32_t _length) const
	3993	{
	3994	if(!srcText.isBogus()) {
	3995	srcText.pinIndices(srcStart, srcLength);
	3996	if(srcLength > 0) {
	3997	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
	3998	}
	3999	}
	4000	return -1;
	4001	}
	4002
	4003	inline int32_t
	4004	UnicodeString::lastIndexOf(const UnicodeString& text,
	4005	int32_t start,
	4006	int32_t _length) const
	4007	{ return lastIndexOf(text, 0, text.length(), start, _length); }
	4008
	4009	inline int32_t
	4010	UnicodeString::lastIndexOf(const UnicodeString& text,
	4011	int32_t start) const {
	4012	pinIndex(start);
	4013	return lastIndexOf(text, 0, text.length(), start, length() - start);
	4014	}
	4015
	4016	inline int32_t
	4017	UnicodeString::lastIndexOf(const UnicodeString& text) const
	4018	{ return lastIndexOf(text, 0, text.length(), 0, length()); }
	4019
	4020	inline int32_t
	4021	UnicodeString::lastIndexOf(UChar c,
	4022	int32_t start,
	4023	int32_t _length) const
	4024	{ return doLastIndexOf(c, start, _length); }
	4025
	4026	inline int32_t
	4027	UnicodeString::lastIndexOf(UChar32 c,
	4028	int32_t start,
	4029	int32_t _length) const {
	4030	return doLastIndexOf(c, start, _length);
	4031	}
	4032
	4033	inline int32_t
	4034	UnicodeString::lastIndexOf(UChar c) const
	4035	{ return doLastIndexOf(c, 0, length()); }
	4036
	4037	inline int32_t
	4038	UnicodeString::lastIndexOf(UChar32 c) const {
	4039	return lastIndexOf(c, 0, length());
	4040	}
	4041
	4042	inline int32_t
	4043	UnicodeString::lastIndexOf(UChar c,
	4044	int32_t start) const {
	4045	pinIndex(start);
	4046	return doLastIndexOf(c, start, length() - start);
	4047	}
	4048
	4049	inline int32_t
	4050	UnicodeString::lastIndexOf(UChar32 c,
	4051	int32_t start) const {
	4052	pinIndex(start);
	4053	return lastIndexOf(c, start, length() - start);
	4054	}
	4055
	4056	inline UBool
	4057	UnicodeString::startsWith(const UnicodeString& text) const
	4058	{ return compare(0, text.length(), text, 0, text.length()) == 0; }
	4059
	4060	inline UBool
	4061	UnicodeString::startsWith(const UnicodeString& srcText,
	4062	int32_t srcStart,
	4063	int32_t srcLength) const
	4064	{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
	4065
	4066	inline UBool
	4067	UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
	4068	if(srcLength < 0) {
	4069	srcLength = u_strlen(srcChars);
	4070	}
	4071	return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
	4072	}
	4073
	4074	inline UBool
	4075	UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
	4076	if(srcLength < 0) {
	4077	srcLength = u_strlen(srcChars);
	4078	}
	4079	return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
	4080	}
	4081
	4082	inline UBool
	4083	UnicodeString::endsWith(const UnicodeString& text) const
	4084	{ return doCompare(length() - text.length(), text.length(),
	4085	text, 0, text.length()) == 0; }
	4086
	4087	inline UBool
	4088	UnicodeString::endsWith(const UnicodeString& srcText,
	4089	int32_t srcStart,
	4090	int32_t srcLength) const {
	4091	srcText.pinIndices(srcStart, srcLength);
	4092	return doCompare(length() - srcLength, srcLength,
	4093	srcText, srcStart, srcLength) == 0;
	4094	}
	4095
	4096	inline UBool
	4097	UnicodeString::endsWith(const UChar *srcChars,
	4098	int32_t srcLength) const {
	4099	if(srcLength < 0) {
	4100	srcLength = u_strlen(srcChars);
	4101	}
	4102	return doCompare(length() - srcLength, srcLength,
	4103	srcChars, 0, srcLength) == 0;
	4104	}
	4105
	4106	inline UBool
	4107	UnicodeString::endsWith(const UChar *srcChars,
	4108	int32_t srcStart,
	4109	int32_t srcLength) const {
	4110	if(srcLength < 0) {
	4111	srcLength = u_strlen(srcChars + srcStart);
	4112	}
	4113	return doCompare(length() - srcLength, srcLength,
	4114	srcChars, srcStart, srcLength) == 0;
	4115	}
	4116
	4117	//========================================
	4118	// replace
	4119	//========================================
	4120	inline UnicodeString&
	4121	UnicodeString::replace(int32_t start,
	4122	int32_t _length,
	4123	const UnicodeString& srcText)
	4124	{ return doReplace(start, _length, srcText, 0, srcText.length()); }
	4125
	4126	inline UnicodeString&
	4127	UnicodeString::replace(int32_t start,
	4128	int32_t _length,
	4129	const UnicodeString& srcText,
	4130	int32_t srcStart,
	4131	int32_t srcLength)
	4132	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
	4133
	4134	inline UnicodeString&
	4135	UnicodeString::replace(int32_t start,
	4136	int32_t _length,
	4137	const UChar *srcChars,
	4138	int32_t srcLength)
	4139	{ return doReplace(start, _length, srcChars, 0, srcLength); }
	4140
	4141	inline UnicodeString&
	4142	UnicodeString::replace(int32_t start,
	4143	int32_t _length,
	4144	const UChar *srcChars,
	4145	int32_t srcStart,
	4146	int32_t srcLength)
	4147	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
	4148
	4149	inline UnicodeString&
	4150	UnicodeString::replace(int32_t start,
	4151	int32_t _length,
	4152	UChar srcChar)
	4153	{ return doReplace(start, _length, &srcChar, 0, 1); }
	4154
	4155	inline UnicodeString&
	4156	UnicodeString::replaceBetween(int32_t start,
	4157	int32_t limit,
	4158	const UnicodeString& srcText)
	4159	{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
	4160
	4161	inline UnicodeString&
	4162	UnicodeString::replaceBetween(int32_t start,
	4163	int32_t limit,
	4164	const UnicodeString& srcText,
	4165	int32_t srcStart,
	4166	int32_t srcLimit)
	4167	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
	4168
	4169	inline UnicodeString&
	4170	UnicodeString::findAndReplace(const UnicodeString& oldText,
	4171	const UnicodeString& newText)
	4172	{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
	4173	newText, 0, newText.length()); }
	4174
	4175	inline UnicodeString&
	4176	UnicodeString::findAndReplace(int32_t start,
	4177	int32_t _length,
	4178	const UnicodeString& oldText,
	4179	const UnicodeString& newText)
	4180	{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
	4181	newText, 0, newText.length()); }
	4182
	4183	// ============================
	4184	// extract
	4185	// ============================
	4186	inline void
	4187	UnicodeString::doExtract(int32_t start,
	4188	int32_t _length,
	4189	UnicodeString& target) const
	4190	{ target.replace(0, target.length(), *this, start, _length); }
	4191
	4192	inline void
	4193	UnicodeString::extract(int32_t start,
	4194	int32_t _length,
	4195	UChar *target,
	4196	int32_t targetStart) const
	4197	{ doExtract(start, _length, target, targetStart); }
	4198
	4199	inline void
	4200	UnicodeString::extract(int32_t start,
	4201	int32_t _length,
	4202	UnicodeString& target) const
	4203	{ doExtract(start, _length, target); }
	4204
	4205	#if !UCONFIG_NO_CONVERSION
	4206
	4207	inline int32_t
	4208	UnicodeString::extract(int32_t start,
	4209	int32_t _length,
	4210	char *dst,
	4211	const char *codepage) const
	4212
	4213	{
	4214	// This dstSize value will be checked explicitly
	4215	return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
	4216	}
	4217
	4218	#endif
	4219
	4220	inline void
	4221	UnicodeString::extractBetween(int32_t start,
	4222	int32_t limit,
	4223	UChar *dst,
	4224	int32_t dstStart) const {
	4225	pinIndex(start);
	4226	pinIndex(limit);
	4227	doExtract(start, limit - start, dst, dstStart);
	4228	}
	4229
	4230	inline UnicodeString
	4231	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
	4232	return tempSubString(start, limit - start);
	4233	}
	4234
	4235	inline UChar
	4236	UnicodeString::doCharAt(int32_t offset) const
	4237	{
	4238	if((uint32_t)offset < (uint32_t)length()) {
	4239	return getArrayStart()[offset];
	4240	} else {
	4241	return kInvalidUChar;
	4242	}
	4243	}
	4244
	4245	inline UChar
	4246	UnicodeString::charAt(int32_t offset) const
	4247	{ return doCharAt(offset); }
	4248
	4249	inline UChar
	4250	UnicodeString::operator[] (int32_t offset) const
	4251	{ return doCharAt(offset); }
	4252
	4253	inline UBool
	4254	UnicodeString::isEmpty() const {
	4255	return fShortLength == 0;
	4256	}
	4257
	4258	//========================================
	4259	// Write implementation methods
	4260	//========================================
	4261	inline void
	4262	UnicodeString::setLength(int32_t len) {
	4263	if(len <= 127) {
	4264	fShortLength = (int8_t)len;
	4265	} else {
	4266	fShortLength = (int8_t)-1;
	4267	fUnion.fFields.fLength = len;
	4268	}
	4269	}
	4270
	4271	inline void
	4272	UnicodeString::setToEmpty() {
	4273	fShortLength = 0;
	4274	fFlags = kShortString;
	4275	}
	4276
	4277	inline void
	4278	UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
	4279	setLength(len);
	4280	fUnion.fFields.fArray = array;
	4281	fUnion.fFields.fCapacity = capacity;
	4282	}
	4283
	4284	inline const UChar *
	4285	UnicodeString::getTerminatedBuffer() {
	4286	if(!isWritable()) {
	4287	return 0;
	4288	} else {
	4289	UChar *array = getArrayStart();
	4290	int32_t len = length();
	4291	if(len < getCapacity() && ((fFlags&kRefCounted) == 0 \|\| refCount() == 1)) {
	4292	/*
	4293	* kRefCounted: Do not write the NUL if the buffer is shared.
	4294	* That is mostly safe, except when the length of one copy was modified
	4295	* without copy-on-write, e.g., via truncate(newLength) or remove(void).
	4296	* Then the NUL would be written into the middle of another copy's string.
	4297	*/
	4298	if(!(fFlags&kBufferIsReadonly)) {
	4299	/*
	4300	* We must not write to a readonly buffer, but it is known to be
	4301	* NUL-terminated if len<capacity.
	4302	* A shared, allocated buffer (refCount()>1) must not have its contents
	4303	* modified, but the NUL at [len] is beyond the string contents,
	4304	* and multiple string objects and threads writing the same NUL into the
	4305	* same location is harmless.
	4306	* In all other cases, the buffer is fully writable and it is anyway safe
	4307	* to write the NUL.
	4308	*
	4309	* Note: An earlier version of this code tested whether there is a NUL
	4310	* at [len] already, but, while safe, it generated lots of warnings from
	4311	* tools like valgrind and Purify.
	4312	*/
	4313	array[len] = 0;
	4314	}
	4315	return array;
	4316	} else if(cloneArrayIfNeeded(len+1)) {
	4317	array = getArrayStart();
	4318	array[len] = 0;
	4319	return array;
	4320	} else {
	4321	return 0;
	4322	}
	4323	}
	4324	}
	4325
	4326	inline UnicodeString&
	4327	UnicodeString::operator= (UChar ch)
	4328	{ return doReplace(0, length(), &ch, 0, 1); }
	4329
	4330	inline UnicodeString&
	4331	UnicodeString::operator= (UChar32 ch)
	4332	{ return replace(0, length(), ch); }
	4333
	4334	inline UnicodeString&
	4335	UnicodeString::setTo(const UnicodeString& srcText,
	4336	int32_t srcStart,
	4337	int32_t srcLength)
	4338	{
	4339	unBogus();
	4340	return doReplace(0, length(), srcText, srcStart, srcLength);
	4341	}
	4342
	4343	inline UnicodeString&
	4344	UnicodeString::setTo(const UnicodeString& srcText,
	4345	int32_t srcStart)
	4346	{
	4347	unBogus();
	4348	srcText.pinIndex(srcStart);
	4349	return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
	4350	}
	4351
	4352	inline UnicodeString&
	4353	UnicodeString::setTo(const UnicodeString& srcText)
	4354	{
	4355	return copyFrom(srcText);
	4356	}
	4357
	4358	inline UnicodeString&
	4359	UnicodeString::setTo(const UChar *srcChars,
	4360	int32_t srcLength)
	4361	{
	4362	unBogus();
	4363	return doReplace(0, length(), srcChars, 0, srcLength);
	4364	}
	4365
	4366	inline UnicodeString&
	4367	UnicodeString::setTo(UChar srcChar)
	4368	{
	4369	unBogus();
	4370	return doReplace(0, length(), &srcChar, 0, 1);
	4371	}
	4372
	4373	inline UnicodeString&
	4374	UnicodeString::setTo(UChar32 srcChar)
	4375	{
	4376	unBogus();
	4377	return replace(0, length(), srcChar);
	4378	}
	4379
	4380	inline UnicodeString&
	4381	UnicodeString::append(const UnicodeString& srcText,
	4382	int32_t srcStart,
	4383	int32_t srcLength)
	4384	{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
	4385
	4386	inline UnicodeString&
	4387	UnicodeString::append(const UnicodeString& srcText)
	4388	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
	4389
	4390	inline UnicodeString&
	4391	UnicodeString::append(const UChar *srcChars,
	4392	int32_t srcStart,
	4393	int32_t srcLength)
	4394	{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
	4395
	4396	inline UnicodeString&
	4397	UnicodeString::append(const UChar *srcChars,
	4398	int32_t srcLength)
	4399	{ return doReplace(length(), 0, srcChars, 0, srcLength); }
	4400
	4401	inline UnicodeString&
	4402	UnicodeString::append(UChar srcChar)
	4403	{ return doReplace(length(), 0, &srcChar, 0, 1); }
	4404
	4405	inline UnicodeString&
	4406	UnicodeString::operator+= (UChar ch)
	4407	{ return doReplace(length(), 0, &ch, 0, 1); }
	4408
	4409	inline UnicodeString&
	4410	UnicodeString::operator+= (UChar32 ch) {
	4411	return append(ch);
	4412	}
	4413
	4414	inline UnicodeString&
	4415	UnicodeString::operator+= (const UnicodeString& srcText)
	4416	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
	4417
	4418	inline UnicodeString&
	4419	UnicodeString::insert(int32_t start,
	4420	const UnicodeString& srcText,
	4421	int32_t srcStart,
	4422	int32_t srcLength)
	4423	{ return doReplace(start, 0, srcText, srcStart, srcLength); }
	4424
	4425	inline UnicodeString&
	4426	UnicodeString::insert(int32_t start,
	4427	const UnicodeString& srcText)
	4428	{ return doReplace(start, 0, srcText, 0, srcText.length()); }
	4429
	4430	inline UnicodeString&
	4431	UnicodeString::insert(int32_t start,
	4432	const UChar *srcChars,
	4433	int32_t srcStart,
	4434	int32_t srcLength)
	4435	{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
	4436
	4437	inline UnicodeString&
	4438	UnicodeString::insert(int32_t start,
	4439	const UChar *srcChars,
	4440	int32_t srcLength)
	4441	{ return doReplace(start, 0, srcChars, 0, srcLength); }
	4442
	4443	inline UnicodeString&
	4444	UnicodeString::insert(int32_t start,
	4445	UChar srcChar)
	4446	{ return doReplace(start, 0, &srcChar, 0, 1); }
	4447
	4448	inline UnicodeString&
	4449	UnicodeString::insert(int32_t start,
	4450	UChar32 srcChar)
	4451	{ return replace(start, 0, srcChar); }
	4452
	4453
	4454	inline UnicodeString&
	4455	UnicodeString::remove()
	4456	{
	4457	// remove() of a bogus string makes the string empty and non-bogus
	4458	// we also un-alias a read-only alias to deal with NUL-termination
	4459	// issues with getTerminatedBuffer()
	4460	if(fFlags & (kIsBogus\|kBufferIsReadonly)) {
	4461	setToEmpty();
	4462	} else {
	4463	fShortLength = 0;
	4464	}
	4465	return *this;
	4466	}
	4467
	4468	inline UnicodeString&
	4469	UnicodeString::remove(int32_t start,
	4470	int32_t _length)
	4471	{
	4472	if(start <= 0 && _length == INT32_MAX) {
	4473	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
	4474	return remove();
	4475	}
	4476	return doReplace(start, _length, NULL, 0, 0);
	4477	}
	4478
	4479	inline UnicodeString&
	4480	UnicodeString::removeBetween(int32_t start,
	4481	int32_t limit)
	4482	{ return doReplace(start, limit - start, NULL, 0, 0); }
	4483
	4484	inline UnicodeString &
	4485	UnicodeString::retainBetween(int32_t start, int32_t limit) {
	4486	truncate(limit);
	4487	return doReplace(0, start, NULL, 0, 0);
	4488	}
	4489
	4490	inline UBool
	4491	UnicodeString::truncate(int32_t targetLength)
	4492	{
	4493	if(isBogus() && targetLength == 0) {
	4494	// truncate(0) of a bogus string makes the string empty and non-bogus
	4495	unBogus();
	4496	return FALSE;
	4497	} else if((uint32_t)targetLength < (uint32_t)length()) {
	4498	setLength(targetLength);
	4499	if(fFlags&kBufferIsReadonly) {
	4500	fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
	4501	}
	4502	return TRUE;
	4503	} else {
	4504	return FALSE;
	4505	}
	4506	}
	4507
	4508	inline UnicodeString&
	4509	UnicodeString::reverse()
	4510	{ return doReverse(0, length()); }
	4511
	4512	inline UnicodeString&
	4513	UnicodeString::reverse(int32_t start,
	4514	int32_t _length)
	4515	{ return doReverse(start, _length); }
	4516
	4517	U_NAMESPACE_END
	4518
	4519	#endif