git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/unicode/unistr.h

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1998-2011, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	*
	7	* File unistr.h
	8	*
	9	* Modification History:
	10	*
	11	* Date Name Description
	12	* 09/25/98 stephen Creation.
	13	* 11/11/98 stephen Changed per 11/9 code review.
	14	* 04/20/99 stephen Overhauled per 4/16 code review.
	15	* 11/18/99 aliu Made to inherit from Replaceable. Added method
	16	* handleReplaceBetween(); other methods unchanged.
	17	* 06/25/01 grhoten Remove dependency on iostream.
	18	******************************************************************************
	19	*/
	20
	21	#ifndef UNISTR_H
	22	#define UNISTR_H
	23
	24	/**
	25	* \file
	26	* \brief C++ API: Unicode String
	27	*/
	28
	29	#include "unicode/utypes.h"
	30	#include "unicode/rep.h"
	31	#include "unicode/std_string.h"
	32	#include "unicode/stringpiece.h"
	33	#include "unicode/bytestream.h"
	34	#include "unicode/ucasemap.h"
	35
	36	struct UConverter; // unicode/ucnv.h
	37	class StringThreadTest;
	38
	39	#ifndef U_COMPARE_CODE_POINT_ORDER
	40	/* see also ustring.h and unorm.h */
	41	/**
	42	* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
	43	* Compare strings in code point order instead of code unit order.
	44	* @stable ICU 2.2
	45	*/
	46	#define U_COMPARE_CODE_POINT_ORDER 0x8000
	47	#endif
	48
	49	#ifndef USTRING_H
	50	/**
	51	* \ingroup ustring_ustrlen
	52	*/
	53	U_STABLE int32_t U_EXPORT2
	54	u_strlen(const UChar *s);
	55	#endif
	56
	57	#ifndef U_STRING_CASE_MAPPER_DEFINED
	58	#define U_STRING_CASE_MAPPER_DEFINED
	59
	60	/**
	61	* Internal string case mapping function type.
	62	* @internal
	63	*/
	64	typedef int32_t U_CALLCONV
	65	UStringCaseMapper(const UCaseMap *csm,
	66	UChar *dest, int32_t destCapacity,
	67	const UChar *src, int32_t srcLength,
	68	UErrorCode *pErrorCode);
	69
	70	#endif
	71
	72	U_NAMESPACE_BEGIN
	73
	74	class BreakIterator; // unicode/brkiter.h
	75	class Locale; // unicode/locid.h
	76	class StringCharacterIterator;
	77	class UnicodeStringAppendable; // unicode/appendable.h
	78
	79	/* The <iostream> include has been moved to unicode/ustream.h */
	80
	81	/**
	82	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
	83	* which constructs a Unicode string from an invariant-character char * string.
	84	* About invariant characters see utypes.h.
	85	* This constructor has no runtime dependency on conversion code and is
	86	* therefore recommended over ones taking a charset name string
	87	* (where the empty string "" indicates invariant-character conversion).
	88	*
	89	* @stable ICU 3.2
	90	*/
	91	#define US_INV icu::UnicodeString::kInvariant
	92
	93	/**
	94	* Unicode String literals in C++.
	95	* Dependent on the platform properties, different UnicodeString
	96	* constructors should be used to create a UnicodeString object from
	97	* a string literal.
	98	* The macros are defined for maximum performance.
	99	* They work only for strings that contain "invariant characters", i.e.,
	100	* only latin letters, digits, and some punctuation.
	101	* See utypes.h for details.
	102	*
	103	* The string parameter must be a C string literal.
	104	* The length of the string, not including the terminating
	105	* <code>NUL</code>, must be specified as a constant.
	106	* The U_STRING_DECL macro should be invoked exactly once for one
	107	* such string variable before it is used.
	108	* @stable ICU 2.0
	109	*/
	110	#if defined(U_DECLARE_UTF16)
	111	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
	112	#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY \|\| (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
	113	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
	114	#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
	115	# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
	116	#else
	117	# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
	118	#endif
	119
	120	/**
	121	* Unicode String literals in C++.
	122	* Dependent on the platform properties, different UnicodeString
	123	* constructors should be used to create a UnicodeString object from
	124	* a string literal.
	125	* The macros are defined for improved performance.
	126	* They work only for strings that contain "invariant characters", i.e.,
	127	* only latin letters, digits, and some punctuation.
	128	* See utypes.h for details.
	129	*
	130	* The string parameter must be a C string literal.
	131	* @stable ICU 2.0
	132	*/
	133	#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
	134
	135	/**
	136	* \def UNISTR_FROM_CHAR_EXPLICIT
	137	* This can be defined to be empty or "explicit".
	138	* If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
	139	* constructors are marked as explicit, preventing their inadvertent use.
	140	* @draft ICU 49
	141	*/
	142	#ifndef UNISTR_FROM_CHAR_EXPLICIT
	143	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
	144	// Auto-"explicit" in ICU library code.
	145	# define UNISTR_FROM_CHAR_EXPLICIT explicit
	146	# else
	147	// Empty by default for source code compatibility.
	148	# define UNISTR_FROM_CHAR_EXPLICIT
	149	# endif
	150	#endif
	151
	152	/**
	153	* \def UNISTR_FROM_STRING_EXPLICIT
	154	* This can be defined to be empty or "explicit".
	155	* If explicit, then the UnicodeString(const char ) and UnicodeString(const UChar )
	156	* constructors are marked as explicit, preventing their inadvertent use.
	157	*
	158	* In particular, this helps prevent accidentally depending on ICU conversion code
	159	* by passing a string literal into an API with a const UnicodeString & parameter.
	160	* @draft ICU 49
	161	*/
	162	#ifndef UNISTR_FROM_STRING_EXPLICIT
	163	# if defined(U_COMBINED_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION) \|\| defined(U_I18N_IMPLEMENTATION) \|\| defined(U_IO_IMPLEMENTATION)
	164	// Auto-"explicit" in ICU library code.
	165	# define UNISTR_FROM_STRING_EXPLICIT explicit
	166	# else
	167	// Empty by default for source code compatibility.
	168	# define UNISTR_FROM_STRING_EXPLICIT
	169	# endif
	170	#endif
	171
	172	/**
	173	* UnicodeString is a string class that stores Unicode characters directly and provides
	174	* similar functionality as the Java String and StringBuffer classes.
	175	* It is a concrete implementation of the abstract class Replaceable (for transliteration).
	176	*
	177	* The UnicodeString class is not suitable for subclassing.
	178	*
	179	* <p>For an overview of Unicode strings in C and C++ see the
	180	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
	181	*
	182	* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
	183	* A Unicode character may be stored with either one code unit
	184	* (the most common case) or with a matched pair of special code units
	185	* ("surrogates"). The data type for code units is UChar.
	186	* For single-character handling, a Unicode character code <em>point</em> is a value
	187	* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
	188	*
	189	* <p>Indexes and offsets into and lengths of strings always count code units, not code points.
	190	* This is the same as with multi-byte char* strings in traditional string handling.
	191	* Operations on partial strings typically do not test for code point boundaries.
	192	* If necessary, the user needs to take care of such boundaries by testing for the code unit
	193	* values or by using functions like
	194	* UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
	195	* (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
	196	*
	197	* UnicodeString methods are more lenient with regard to input parameter values
	198	* than other ICU APIs. In particular:
	199	* - If indexes are out of bounds for a UnicodeString object
	200	* (<0 or >length()) then they are "pinned" to the nearest boundary.
	201	* - If primitive string pointer values (e.g., const UChar * or char *)
	202	* for input strings are NULL, then those input string parameters are treated
	203	* as if they pointed to an empty string.
	204	* However, this is <em>not</em> the case for char * parameters for charset names
	205	* or other IDs.
	206	* - Most UnicodeString methods do not take a UErrorCode parameter because
	207	* there are usually very few opportunities for failure other than a shortage
	208	* of memory, error codes in low-level C++ string methods would be inconvenient,
	209	* and the error code as the last parameter (ICU convention) would prevent
	210	* the use of default parameter values.
	211	* Instead, such methods set the UnicodeString into a "bogus" state
	212	* (see isBogus()) if an error occurs.
	213	*
	214	* In string comparisons, two UnicodeString objects that are both "bogus"
	215	* compare equal (to be transitive and prevent endless loops in sorting),
	216	* and a "bogus" string compares less than any non-"bogus" one.
	217	*
	218	* Const UnicodeString methods are thread-safe. Multiple threads can use
	219	* const methods on the same UnicodeString object simultaneously,
	220	* but non-const methods must not be called concurrently (in multiple threads)
	221	* with any other (const or non-const) methods.
	222	*
	223	* Similarly, const UnicodeString & parameters are thread-safe.
	224	* One object may be passed in as such a parameter concurrently in multiple threads.
	225	* This includes the const UnicodeString & parameters for
	226	* copy construction, assignment, and cloning.
	227	*
	228	* <p>UnicodeString uses several storage methods.
	229	* String contents can be stored inside the UnicodeString object itself,
	230	* in an allocated and shared buffer, or in an outside buffer that is "aliased".
	231	* Most of this is done transparently, but careful aliasing in particular provides
	232	* significant performance improvements.
	233	* Also, the internal buffer is accessible via special functions.
	234	* For details see the
	235	* <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
	236	*
	237	* @see utf.h
	238	* @see CharacterIterator
	239	* @stable ICU 2.0
	240	*/
	241	class U_COMMON_API UnicodeString : public Replaceable
	242	{
	243	public:
	244
	245	/**
	246	* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
	247	* which constructs a Unicode string from an invariant-character char * string.
	248	* Use the macro US_INV instead of the full qualification for this value.
	249	*
	250	* @see US_INV
	251	* @stable ICU 3.2
	252	*/
	253	enum EInvariant {
	254	/**
	255	* @see EInvariant
	256	* @stable ICU 3.2
	257	*/
	258	kInvariant
	259	};
	260
	261	//========================================
	262	// Read-only operations
	263	//========================================
	264
	265	/* Comparison - bitwise only - for international comparison use collation */
	266
	267	/**
	268	* Equality operator. Performs only bitwise comparison.
	269	* @param text The UnicodeString to compare to this one.
	270	* @return TRUE if <TT>text</TT> contains the same characters as this one,
	271	* FALSE otherwise.
	272	* @stable ICU 2.0
	273	*/
	274	inline UBool operator== (const UnicodeString& text) const;
	275
	276	/**
	277	* Inequality operator. Performs only bitwise comparison.
	278	* @param text The UnicodeString to compare to this one.
	279	* @return FALSE if <TT>text</TT> contains the same characters as this one,
	280	* TRUE otherwise.
	281	* @stable ICU 2.0
	282	*/
	283	inline UBool operator!= (const UnicodeString& text) const;
	284
	285	/**
	286	* Greater than operator. Performs only bitwise comparison.
	287	* @param text The UnicodeString to compare to this one.
	288	* @return TRUE if the characters in this are bitwise
	289	* greater than the characters in <code>text</code>, FALSE otherwise
	290	* @stable ICU 2.0
	291	*/
	292	inline UBool operator> (const UnicodeString& text) const;
	293
	294	/**
	295	* Less than operator. Performs only bitwise comparison.
	296	* @param text The UnicodeString to compare to this one.
	297	* @return TRUE if the characters in this are bitwise
	298	* less than the characters in <code>text</code>, FALSE otherwise
	299	* @stable ICU 2.0
	300	*/
	301	inline UBool operator< (const UnicodeString& text) const;
	302
	303	/**
	304	* Greater than or equal operator. Performs only bitwise comparison.
	305	* @param text The UnicodeString to compare to this one.
	306	* @return TRUE if the characters in this are bitwise
	307	* greater than or equal to the characters in <code>text</code>, FALSE otherwise
	308	* @stable ICU 2.0
	309	*/
	310	inline UBool operator>= (const UnicodeString& text) const;
	311
	312	/**
	313	* Less than or equal operator. Performs only bitwise comparison.
	314	* @param text The UnicodeString to compare to this one.
	315	* @return TRUE if the characters in this are bitwise
	316	* less than or equal to the characters in <code>text</code>, FALSE otherwise
	317	* @stable ICU 2.0
	318	*/
	319	inline UBool operator<= (const UnicodeString& text) const;
	320
	321	/**
	322	* Compare the characters bitwise in this UnicodeString to
	323	* the characters in <code>text</code>.
	324	* @param text The UnicodeString to compare to this one.
	325	* @return The result of bitwise character comparison: 0 if this
	326	* contains the same characters as <code>text</code>, -1 if the characters in
	327	* this are bitwise less than the characters in <code>text</code>, +1 if the
	328	* characters in this are bitwise greater than the characters
	329	* in <code>text</code>.
	330	* @stable ICU 2.0
	331	*/
	332	inline int8_t compare(const UnicodeString& text) const;
	333
	334	/**
	335	* Compare the characters bitwise in the range
	336	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	337	* in <TT>text</TT>
	338	* @param start the offset at which the compare operation begins
	339	* @param length the number of characters of text to compare.
	340	* @param text the other text to be compared against this string.
	341	* @return The result of bitwise character comparison: 0 if this
	342	* contains the same characters as <code>text</code>, -1 if the characters in
	343	* this are bitwise less than the characters in <code>text</code>, +1 if the
	344	* characters in this are bitwise greater than the characters
	345	* in <code>text</code>.
	346	* @stable ICU 2.0
	347	*/
	348	inline int8_t compare(int32_t start,
	349	int32_t length,
	350	const UnicodeString& text) const;
	351
	352	/**
	353	* Compare the characters bitwise in the range
	354	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	355	* in <TT>srcText</TT> in the range
	356	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	357	* @param start the offset at which the compare operation begins
	358	* @param length the number of characters in this to compare.
	359	* @param srcText the text to be compared
	360	* @param srcStart the offset into <TT>srcText</TT> to start comparison
	361	* @param srcLength the number of characters in <TT>src</TT> to compare
	362	* @return The result of bitwise character comparison: 0 if this
	363	* contains the same characters as <code>srcText</code>, -1 if the characters in
	364	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
	365	* characters in this are bitwise greater than the characters
	366	* in <code>srcText</code>.
	367	* @stable ICU 2.0
	368	*/
	369	inline int8_t compare(int32_t start,
	370	int32_t length,
	371	const UnicodeString& srcText,
	372	int32_t srcStart,
	373	int32_t srcLength) const;
	374
	375	/**
	376	* Compare the characters bitwise in this UnicodeString with the first
	377	* <TT>srcLength</TT> characters in <TT>srcChars</TT>.
	378	* @param srcChars The characters to compare to this UnicodeString.
	379	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
	380	* @return The result of bitwise character comparison: 0 if this
	381	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	382	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	383	* characters in this are bitwise greater than the characters
	384	* in <code>srcChars</code>.
	385	* @stable ICU 2.0
	386	*/
	387	inline int8_t compare(const UChar *srcChars,
	388	int32_t srcLength) const;
	389
	390	/**
	391	* Compare the characters bitwise in the range
	392	* [<TT>start</TT>, <TT>start + length</TT>) with the first
	393	* <TT>length</TT> characters in <TT>srcChars</TT>
	394	* @param start the offset at which the compare operation begins
	395	* @param length the number of characters to compare.
	396	* @param srcChars the characters to be compared
	397	* @return The result of bitwise character comparison: 0 if this
	398	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	399	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	400	* characters in this are bitwise greater than the characters
	401	* in <code>srcChars</code>.
	402	* @stable ICU 2.0
	403	*/
	404	inline int8_t compare(int32_t start,
	405	int32_t length,
	406	const UChar *srcChars) const;
	407
	408	/**
	409	* Compare the characters bitwise in the range
	410	* [<TT>start</TT>, <TT>start + length</TT>) with the characters
	411	* in <TT>srcChars</TT> in the range
	412	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	413	* @param start the offset at which the compare operation begins
	414	* @param length the number of characters in this to compare
	415	* @param srcChars the characters to be compared
	416	* @param srcStart the offset into <TT>srcChars</TT> to start comparison
	417	* @param srcLength the number of characters in <TT>srcChars</TT> to compare
	418	* @return The result of bitwise character comparison: 0 if this
	419	* contains the same characters as <code>srcChars</code>, -1 if the characters in
	420	* this are bitwise less than the characters in <code>srcChars</code>, +1 if the
	421	* characters in this are bitwise greater than the characters
	422	* in <code>srcChars</code>.
	423	* @stable ICU 2.0
	424	*/
	425	inline int8_t compare(int32_t start,
	426	int32_t length,
	427	const UChar *srcChars,
	428	int32_t srcStart,
	429	int32_t srcLength) const;
	430
	431	/**
	432	* Compare the characters bitwise in the range
	433	* [<TT>start</TT>, <TT>limit</TT>) with the characters
	434	* in <TT>srcText</TT> in the range
	435	* [<TT>srcStart</TT>, <TT>srcLimit</TT>).
	436	* @param start the offset at which the compare operation begins
	437	* @param limit the offset immediately following the compare operation
	438	* @param srcText the text to be compared
	439	* @param srcStart the offset into <TT>srcText</TT> to start comparison
	440	* @param srcLimit the offset into <TT>srcText</TT> to limit comparison
	441	* @return The result of bitwise character comparison: 0 if this
	442	* contains the same characters as <code>srcText</code>, -1 if the characters in
	443	* this are bitwise less than the characters in <code>srcText</code>, +1 if the
	444	* characters in this are bitwise greater than the characters
	445	* in <code>srcText</code>.
	446	* @stable ICU 2.0
	447	*/
	448	inline int8_t compareBetween(int32_t start,
	449	int32_t limit,
	450	const UnicodeString& srcText,
	451	int32_t srcStart,
	452	int32_t srcLimit) const;
	453
	454	/**
	455	* Compare two Unicode strings in code point order.
	456	* The result may be different from the results of compare(), operator<, etc.
	457	* if supplementary characters are present:
	458	*
	459	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	460	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	461	* which means that they compare as less than some other BMP characters like U+feff.
	462	* This function compares Unicode strings in code point order.
	463	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	464	*
	465	* @param text Another string to compare this one to.
	466	* @return a negative/zero/positive integer corresponding to whether
	467	* this string is less than/equal to/greater than the second one
	468	* in code point order
	469	* @stable ICU 2.0
	470	*/
	471	inline int8_t compareCodePointOrder(const UnicodeString& text) const;
	472
	473	/**
	474	* Compare two Unicode strings in code point order.
	475	* The result may be different from the results of compare(), operator<, etc.
	476	* if supplementary characters are present:
	477	*
	478	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	479	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	480	* which means that they compare as less than some other BMP characters like U+feff.
	481	* This function compares Unicode strings in code point order.
	482	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	483	*
	484	* @param start The start offset in this string at which the compare operation begins.
	485	* @param length The number of code units from this string to compare.
	486	* @param srcText Another string to compare this one to.
	487	* @return a negative/zero/positive integer corresponding to whether
	488	* this string is less than/equal to/greater than the second one
	489	* in code point order
	490	* @stable ICU 2.0
	491	*/
	492	inline int8_t compareCodePointOrder(int32_t start,
	493	int32_t length,
	494	const UnicodeString& srcText) const;
	495
	496	/**
	497	* Compare two Unicode strings in code point order.
	498	* The result may be different from the results of compare(), operator<, etc.
	499	* if supplementary characters are present:
	500	*
	501	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	502	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	503	* which means that they compare as less than some other BMP characters like U+feff.
	504	* This function compares Unicode strings in code point order.
	505	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	506	*
	507	* @param start The start offset in this string at which the compare operation begins.
	508	* @param length The number of code units from this string to compare.
	509	* @param srcText Another string to compare this one to.
	510	* @param srcStart The start offset in that string at which the compare operation begins.
	511	* @param srcLength The number of code units from that string to compare.
	512	* @return a negative/zero/positive integer corresponding to whether
	513	* this string is less than/equal to/greater than the second one
	514	* in code point order
	515	* @stable ICU 2.0
	516	*/
	517	inline int8_t compareCodePointOrder(int32_t start,
	518	int32_t length,
	519	const UnicodeString& srcText,
	520	int32_t srcStart,
	521	int32_t srcLength) const;
	522
	523	/**
	524	* Compare two Unicode strings in code point order.
	525	* The result may be different from the results of compare(), operator<, etc.
	526	* if supplementary characters are present:
	527	*
	528	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	529	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	530	* which means that they compare as less than some other BMP characters like U+feff.
	531	* This function compares Unicode strings in code point order.
	532	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	533	*
	534	* @param srcChars A pointer to another string to compare this one to.
	535	* @param srcLength The number of code units from that string to compare.
	536	* @return a negative/zero/positive integer corresponding to whether
	537	* this string is less than/equal to/greater than the second one
	538	* in code point order
	539	* @stable ICU 2.0
	540	*/
	541	inline int8_t compareCodePointOrder(const UChar *srcChars,
	542	int32_t srcLength) const;
	543
	544	/**
	545	* Compare two Unicode strings in code point order.
	546	* The result may be different from the results of compare(), operator<, etc.
	547	* if supplementary characters are present:
	548	*
	549	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	550	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	551	* which means that they compare as less than some other BMP characters like U+feff.
	552	* This function compares Unicode strings in code point order.
	553	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	554	*
	555	* @param start The start offset in this string at which the compare operation begins.
	556	* @param length The number of code units from this string to compare.
	557	* @param srcChars A pointer to another string to compare this one to.
	558	* @return a negative/zero/positive integer corresponding to whether
	559	* this string is less than/equal to/greater than the second one
	560	* in code point order
	561	* @stable ICU 2.0
	562	*/
	563	inline int8_t compareCodePointOrder(int32_t start,
	564	int32_t length,
	565	const UChar *srcChars) const;
	566
	567	/**
	568	* Compare two Unicode strings in code point order.
	569	* The result may be different from the results of compare(), operator<, etc.
	570	* if supplementary characters are present:
	571	*
	572	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	573	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	574	* which means that they compare as less than some other BMP characters like U+feff.
	575	* This function compares Unicode strings in code point order.
	576	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	577	*
	578	* @param start The start offset in this string at which the compare operation begins.
	579	* @param length The number of code units from this string to compare.
	580	* @param srcChars A pointer to another string to compare this one to.
	581	* @param srcStart The start offset in that string at which the compare operation begins.
	582	* @param srcLength The number of code units from that string to compare.
	583	* @return a negative/zero/positive integer corresponding to whether
	584	* this string is less than/equal to/greater than the second one
	585	* in code point order
	586	* @stable ICU 2.0
	587	*/
	588	inline int8_t compareCodePointOrder(int32_t start,
	589	int32_t length,
	590	const UChar *srcChars,
	591	int32_t srcStart,
	592	int32_t srcLength) const;
	593
	594	/**
	595	* Compare two Unicode strings in code point order.
	596	* The result may be different from the results of compare(), operator<, etc.
	597	* if supplementary characters are present:
	598	*
	599	* In UTF-16, supplementary characters (with code points U+10000 and above) are
	600	* stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
	601	* which means that they compare as less than some other BMP characters like U+feff.
	602	* This function compares Unicode strings in code point order.
	603	* If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
	604	*
	605	* @param start The start offset in this string at which the compare operation begins.
	606	* @param limit The offset after the last code unit from this string to compare.
	607	* @param srcText Another string to compare this one to.
	608	* @param srcStart The start offset in that string at which the compare operation begins.
	609	* @param srcLimit The offset after the last code unit from that string to compare.
	610	* @return a negative/zero/positive integer corresponding to whether
	611	* this string is less than/equal to/greater than the second one
	612	* in code point order
	613	* @stable ICU 2.0
	614	*/
	615	inline int8_t compareCodePointOrderBetween(int32_t start,
	616	int32_t limit,
	617	const UnicodeString& srcText,
	618	int32_t srcStart,
	619	int32_t srcLimit) const;
	620
	621	/**
	622	* Compare two strings case-insensitively using full case folding.
	623	* This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
	624	*
	625	* @param text Another string to compare this one to.
	626	* @param options A bit set of options:
	627	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	628	* Comparison in code unit order with default case folding.
	629	*
	630	* - U_COMPARE_CODE_POINT_ORDER
	631	* Set to choose code point order instead of code unit order
	632	* (see u_strCompare for details).
	633	*
	634	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	635	*
	636	* @return A negative, zero, or positive integer indicating the comparison result.
	637	* @stable ICU 2.0
	638	*/
	639	inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
	640
	641	/**
	642	* Compare two strings case-insensitively using full case folding.
	643	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
	644	*
	645	* @param start The start offset in this string at which the compare operation begins.
	646	* @param length The number of code units from this string to compare.
	647	* @param srcText Another string to compare this one to.
	648	* @param options A bit set of options:
	649	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	650	* Comparison in code unit order with default case folding.
	651	*
	652	* - U_COMPARE_CODE_POINT_ORDER
	653	* Set to choose code point order instead of code unit order
	654	* (see u_strCompare for details).
	655	*
	656	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	657	*
	658	* @return A negative, zero, or positive integer indicating the comparison result.
	659	* @stable ICU 2.0
	660	*/
	661	inline int8_t caseCompare(int32_t start,
	662	int32_t length,
	663	const UnicodeString& srcText,
	664	uint32_t options) const;
	665
	666	/**
	667	* Compare two strings case-insensitively using full case folding.
	668	* This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
	669	*
	670	* @param start The start offset in this string at which the compare operation begins.
	671	* @param length The number of code units from this string to compare.
	672	* @param srcText Another string to compare this one to.
	673	* @param srcStart The start offset in that string at which the compare operation begins.
	674	* @param srcLength The number of code units from that string to compare.
	675	* @param options A bit set of options:
	676	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	677	* Comparison in code unit order with default case folding.
	678	*
	679	* - U_COMPARE_CODE_POINT_ORDER
	680	* Set to choose code point order instead of code unit order
	681	* (see u_strCompare for details).
	682	*
	683	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	684	*
	685	* @return A negative, zero, or positive integer indicating the comparison result.
	686	* @stable ICU 2.0
	687	*/
	688	inline int8_t caseCompare(int32_t start,
	689	int32_t length,
	690	const UnicodeString& srcText,
	691	int32_t srcStart,
	692	int32_t srcLength,
	693	uint32_t options) const;
	694
	695	/**
	696	* Compare two strings case-insensitively using full case folding.
	697	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	698	*
	699	* @param srcChars A pointer to another string to compare this one to.
	700	* @param srcLength The number of code units from that string to compare.
	701	* @param options A bit set of options:
	702	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	703	* Comparison in code unit order with default case folding.
	704	*
	705	* - U_COMPARE_CODE_POINT_ORDER
	706	* Set to choose code point order instead of code unit order
	707	* (see u_strCompare for details).
	708	*
	709	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	710	*
	711	* @return A negative, zero, or positive integer indicating the comparison result.
	712	* @stable ICU 2.0
	713	*/
	714	inline int8_t caseCompare(const UChar *srcChars,
	715	int32_t srcLength,
	716	uint32_t options) const;
	717
	718	/**
	719	* Compare two strings case-insensitively using full case folding.
	720	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	721	*
	722	* @param start The start offset in this string at which the compare operation begins.
	723	* @param length The number of code units from this string to compare.
	724	* @param srcChars A pointer to another string to compare this one to.
	725	* @param options A bit set of options:
	726	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	727	* Comparison in code unit order with default case folding.
	728	*
	729	* - U_COMPARE_CODE_POINT_ORDER
	730	* Set to choose code point order instead of code unit order
	731	* (see u_strCompare for details).
	732	*
	733	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	734	*
	735	* @return A negative, zero, or positive integer indicating the comparison result.
	736	* @stable ICU 2.0
	737	*/
	738	inline int8_t caseCompare(int32_t start,
	739	int32_t length,
	740	const UChar *srcChars,
	741	uint32_t options) const;
	742
	743	/**
	744	* Compare two strings case-insensitively using full case folding.
	745	* This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
	746	*
	747	* @param start The start offset in this string at which the compare operation begins.
	748	* @param length The number of code units from this string to compare.
	749	* @param srcChars A pointer to another string to compare this one to.
	750	* @param srcStart The start offset in that string at which the compare operation begins.
	751	* @param srcLength The number of code units from that string to compare.
	752	* @param options A bit set of options:
	753	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	754	* Comparison in code unit order with default case folding.
	755	*
	756	* - U_COMPARE_CODE_POINT_ORDER
	757	* Set to choose code point order instead of code unit order
	758	* (see u_strCompare for details).
	759	*
	760	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	761	*
	762	* @return A negative, zero, or positive integer indicating the comparison result.
	763	* @stable ICU 2.0
	764	*/
	765	inline int8_t caseCompare(int32_t start,
	766	int32_t length,
	767	const UChar *srcChars,
	768	int32_t srcStart,
	769	int32_t srcLength,
	770	uint32_t options) const;
	771
	772	/**
	773	* Compare two strings case-insensitively using full case folding.
	774	* This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
	775	*
	776	* @param start The start offset in this string at which the compare operation begins.
	777	* @param limit The offset after the last code unit from this string to compare.
	778	* @param srcText Another string to compare this one to.
	779	* @param srcStart The start offset in that string at which the compare operation begins.
	780	* @param srcLimit The offset after the last code unit from that string to compare.
	781	* @param options A bit set of options:
	782	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
	783	* Comparison in code unit order with default case folding.
	784	*
	785	* - U_COMPARE_CODE_POINT_ORDER
	786	* Set to choose code point order instead of code unit order
	787	* (see u_strCompare for details).
	788	*
	789	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
	790	*
	791	* @return A negative, zero, or positive integer indicating the comparison result.
	792	* @stable ICU 2.0
	793	*/
	794	inline int8_t caseCompareBetween(int32_t start,
	795	int32_t limit,
	796	const UnicodeString& srcText,
	797	int32_t srcStart,
	798	int32_t srcLimit,
	799	uint32_t options) const;
	800
	801	/**
	802	* Determine if this starts with the characters in <TT>text</TT>
	803	* @param text The text to match.
	804	* @return TRUE if this starts with the characters in <TT>text</TT>,
	805	* FALSE otherwise
	806	* @stable ICU 2.0
	807	*/
	808	inline UBool startsWith(const UnicodeString& text) const;
	809
	810	/**
	811	* Determine if this starts with the characters in <TT>srcText</TT>
	812	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	813	* @param srcText The text to match.
	814	* @param srcStart the offset into <TT>srcText</TT> to start matching
	815	* @param srcLength the number of characters in <TT>srcText</TT> to match
	816	* @return TRUE if this starts with the characters in <TT>text</TT>,
	817	* FALSE otherwise
	818	* @stable ICU 2.0
	819	*/
	820	inline UBool startsWith(const UnicodeString& srcText,
	821	int32_t srcStart,
	822	int32_t srcLength) const;
	823
	824	/**
	825	* Determine if this starts with the characters in <TT>srcChars</TT>
	826	* @param srcChars The characters to match.
	827	* @param srcLength the number of characters in <TT>srcChars</TT>
	828	* @return TRUE if this starts with the characters in <TT>srcChars</TT>,
	829	* FALSE otherwise
	830	* @stable ICU 2.0
	831	*/
	832	inline UBool startsWith(const UChar *srcChars,
	833	int32_t srcLength) const;
	834
	835	/**
	836	* Determine if this ends with the characters in <TT>srcChars</TT>
	837	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	838	* @param srcChars The characters to match.
	839	* @param srcStart the offset into <TT>srcText</TT> to start matching
	840	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	841	* @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
	842	* @stable ICU 2.0
	843	*/
	844	inline UBool startsWith(const UChar *srcChars,
	845	int32_t srcStart,
	846	int32_t srcLength) const;
	847
	848	/**
	849	* Determine if this ends with the characters in <TT>text</TT>
	850	* @param text The text to match.
	851	* @return TRUE if this ends with the characters in <TT>text</TT>,
	852	* FALSE otherwise
	853	* @stable ICU 2.0
	854	*/
	855	inline UBool endsWith(const UnicodeString& text) const;
	856
	857	/**
	858	* Determine if this ends with the characters in <TT>srcText</TT>
	859	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	860	* @param srcText The text to match.
	861	* @param srcStart the offset into <TT>srcText</TT> to start matching
	862	* @param srcLength the number of characters in <TT>srcText</TT> to match
	863	* @return TRUE if this ends with the characters in <TT>text</TT>,
	864	* FALSE otherwise
	865	* @stable ICU 2.0
	866	*/
	867	inline UBool endsWith(const UnicodeString& srcText,
	868	int32_t srcStart,
	869	int32_t srcLength) const;
	870
	871	/**
	872	* Determine if this ends with the characters in <TT>srcChars</TT>
	873	* @param srcChars The characters to match.
	874	* @param srcLength the number of characters in <TT>srcChars</TT>
	875	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
	876	* FALSE otherwise
	877	* @stable ICU 2.0
	878	*/
	879	inline UBool endsWith(const UChar *srcChars,
	880	int32_t srcLength) const;
	881
	882	/**
	883	* Determine if this ends with the characters in <TT>srcChars</TT>
	884	* in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	885	* @param srcChars The characters to match.
	886	* @param srcStart the offset into <TT>srcText</TT> to start matching
	887	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	888	* @return TRUE if this ends with the characters in <TT>srcChars</TT>,
	889	* FALSE otherwise
	890	* @stable ICU 2.0
	891	*/
	892	inline UBool endsWith(const UChar *srcChars,
	893	int32_t srcStart,
	894	int32_t srcLength) const;
	895
	896
	897	/* Searching - bitwise only */
	898
	899	/**
	900	* Locate in this the first occurrence of the characters in <TT>text</TT>,
	901	* using bitwise comparison.
	902	* @param text The text to search for.
	903	* @return The offset into this of the start of <TT>text</TT>,
	904	* or -1 if not found.
	905	* @stable ICU 2.0
	906	*/
	907	inline int32_t indexOf(const UnicodeString& text) const;
	908
	909	/**
	910	* Locate in this the first occurrence of the characters in <TT>text</TT>
	911	* starting at offset <TT>start</TT>, using bitwise comparison.
	912	* @param text The text to search for.
	913	* @param start The offset at which searching will start.
	914	* @return The offset into this of the start of <TT>text</TT>,
	915	* or -1 if not found.
	916	* @stable ICU 2.0
	917	*/
	918	inline int32_t indexOf(const UnicodeString& text,
	919	int32_t start) const;
	920
	921	/**
	922	* Locate in this the first occurrence in the range
	923	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	924	* in <TT>text</TT>, using bitwise comparison.
	925	* @param text The text to search for.
	926	* @param start The offset at which searching will start.
	927	* @param length The number of characters to search
	928	* @return The offset into this of the start of <TT>text</TT>,
	929	* or -1 if not found.
	930	* @stable ICU 2.0
	931	*/
	932	inline int32_t indexOf(const UnicodeString& text,
	933	int32_t start,
	934	int32_t length) const;
	935
	936	/**
	937	* Locate in this the first occurrence in the range
	938	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	939	* in <TT>srcText</TT> in the range
	940	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	941	* using bitwise comparison.
	942	* @param srcText The text to search for.
	943	* @param srcStart the offset into <TT>srcText</TT> at which
	944	* to start matching
	945	* @param srcLength the number of characters in <TT>srcText</TT> to match
	946	* @param start the offset into this at which to start matching
	947	* @param length the number of characters in this to search
	948	* @return The offset into this of the start of <TT>text</TT>,
	949	* or -1 if not found.
	950	* @stable ICU 2.0
	951	*/
	952	inline int32_t indexOf(const UnicodeString& srcText,
	953	int32_t srcStart,
	954	int32_t srcLength,
	955	int32_t start,
	956	int32_t length) const;
	957
	958	/**
	959	* Locate in this the first occurrence of the characters in
	960	* <TT>srcChars</TT>
	961	* starting at offset <TT>start</TT>, using bitwise comparison.
	962	* @param srcChars The text to search for.
	963	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	964	* @param start the offset into this at which to start matching
	965	* @return The offset into this of the start of <TT>text</TT>,
	966	* or -1 if not found.
	967	* @stable ICU 2.0
	968	*/
	969	inline int32_t indexOf(const UChar *srcChars,
	970	int32_t srcLength,
	971	int32_t start) const;
	972
	973	/**
	974	* Locate in this the first occurrence in the range
	975	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	976	* in <TT>srcChars</TT>, using bitwise comparison.
	977	* @param srcChars The text to search for.
	978	* @param srcLength the number of characters in <TT>srcChars</TT>
	979	* @param start The offset at which searching will start.
	980	* @param length The number of characters to search
	981	* @return The offset into this of the start of <TT>srcChars</TT>,
	982	* or -1 if not found.
	983	* @stable ICU 2.0
	984	*/
	985	inline int32_t indexOf(const UChar *srcChars,
	986	int32_t srcLength,
	987	int32_t start,
	988	int32_t length) const;
	989
	990	/**
	991	* Locate in this the first occurrence in the range
	992	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	993	* in <TT>srcChars</TT> in the range
	994	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	995	* using bitwise comparison.
	996	* @param srcChars The text to search for.
	997	* @param srcStart the offset into <TT>srcChars</TT> at which
	998	* to start matching
	999	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1000	* @param start the offset into this at which to start matching
	1001	* @param length the number of characters in this to search
	1002	* @return The offset into this of the start of <TT>text</TT>,
	1003	* or -1 if not found.
	1004	* @stable ICU 2.0
	1005	*/
	1006	int32_t indexOf(const UChar *srcChars,
	1007	int32_t srcStart,
	1008	int32_t srcLength,
	1009	int32_t start,
	1010	int32_t length) const;
	1011
	1012	/**
	1013	* Locate in this the first occurrence of the BMP code point <code>c</code>,
	1014	* using bitwise comparison.
	1015	* @param c The code unit to search for.
	1016	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1017	* @stable ICU 2.0
	1018	*/
	1019	inline int32_t indexOf(UChar c) const;
	1020
	1021	/**
	1022	* Locate in this the first occurrence of the code point <TT>c</TT>,
	1023	* using bitwise comparison.
	1024	*
	1025	* @param c The code point to search for.
	1026	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1027	* @stable ICU 2.0
	1028	*/
	1029	inline int32_t indexOf(UChar32 c) const;
	1030
	1031	/**
	1032	* Locate in this the first occurrence of the BMP code point <code>c</code>,
	1033	* starting at offset <TT>start</TT>, using bitwise comparison.
	1034	* @param c The code unit to search for.
	1035	* @param start The offset at which searching will start.
	1036	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1037	* @stable ICU 2.0
	1038	*/
	1039	inline int32_t indexOf(UChar c,
	1040	int32_t start) const;
	1041
	1042	/**
	1043	* Locate in this the first occurrence of the code point <TT>c</TT>
	1044	* starting at offset <TT>start</TT>, using bitwise comparison.
	1045	*
	1046	* @param c The code point to search for.
	1047	* @param start The offset at which searching will start.
	1048	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1049	* @stable ICU 2.0
	1050	*/
	1051	inline int32_t indexOf(UChar32 c,
	1052	int32_t start) const;
	1053
	1054	/**
	1055	* Locate in this the first occurrence of the BMP code point <code>c</code>
	1056	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1057	* using bitwise comparison.
	1058	* @param c The code unit to search for.
	1059	* @param start the offset into this at which to start matching
	1060	* @param length the number of characters in this to search
	1061	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1062	* @stable ICU 2.0
	1063	*/
	1064	inline int32_t indexOf(UChar c,
	1065	int32_t start,
	1066	int32_t length) const;
	1067
	1068	/**
	1069	* Locate in this the first occurrence of the code point <TT>c</TT>
	1070	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1071	* using bitwise comparison.
	1072	*
	1073	* @param c The code point to search for.
	1074	* @param start the offset into this at which to start matching
	1075	* @param length the number of characters in this to search
	1076	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1077	* @stable ICU 2.0
	1078	*/
	1079	inline int32_t indexOf(UChar32 c,
	1080	int32_t start,
	1081	int32_t length) const;
	1082
	1083	/**
	1084	* Locate in this the last occurrence of the characters in <TT>text</TT>,
	1085	* using bitwise comparison.
	1086	* @param text The text to search for.
	1087	* @return The offset into this of the start of <TT>text</TT>,
	1088	* or -1 if not found.
	1089	* @stable ICU 2.0
	1090	*/
	1091	inline int32_t lastIndexOf(const UnicodeString& text) const;
	1092
	1093	/**
	1094	* Locate in this the last occurrence of the characters in <TT>text</TT>
	1095	* starting at offset <TT>start</TT>, using bitwise comparison.
	1096	* @param text The text to search for.
	1097	* @param start The offset at which searching will start.
	1098	* @return The offset into this of the start of <TT>text</TT>,
	1099	* or -1 if not found.
	1100	* @stable ICU 2.0
	1101	*/
	1102	inline int32_t lastIndexOf(const UnicodeString& text,
	1103	int32_t start) const;
	1104
	1105	/**
	1106	* Locate in this the last occurrence in the range
	1107	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1108	* in <TT>text</TT>, using bitwise comparison.
	1109	* @param text The text to search for.
	1110	* @param start The offset at which searching will start.
	1111	* @param length The number of characters to search
	1112	* @return The offset into this of the start of <TT>text</TT>,
	1113	* or -1 if not found.
	1114	* @stable ICU 2.0
	1115	*/
	1116	inline int32_t lastIndexOf(const UnicodeString& text,
	1117	int32_t start,
	1118	int32_t length) const;
	1119
	1120	/**
	1121	* Locate in this the last occurrence in the range
	1122	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1123	* in <TT>srcText</TT> in the range
	1124	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	1125	* using bitwise comparison.
	1126	* @param srcText The text to search for.
	1127	* @param srcStart the offset into <TT>srcText</TT> at which
	1128	* to start matching
	1129	* @param srcLength the number of characters in <TT>srcText</TT> to match
	1130	* @param start the offset into this at which to start matching
	1131	* @param length the number of characters in this to search
	1132	* @return The offset into this of the start of <TT>text</TT>,
	1133	* or -1 if not found.
	1134	* @stable ICU 2.0
	1135	*/
	1136	inline int32_t lastIndexOf(const UnicodeString& srcText,
	1137	int32_t srcStart,
	1138	int32_t srcLength,
	1139	int32_t start,
	1140	int32_t length) const;
	1141
	1142	/**
	1143	* Locate in this the last occurrence of the characters in <TT>srcChars</TT>
	1144	* starting at offset <TT>start</TT>, using bitwise comparison.
	1145	* @param srcChars The text to search for.
	1146	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1147	* @param start the offset into this at which to start matching
	1148	* @return The offset into this of the start of <TT>text</TT>,
	1149	* or -1 if not found.
	1150	* @stable ICU 2.0
	1151	*/
	1152	inline int32_t lastIndexOf(const UChar *srcChars,
	1153	int32_t srcLength,
	1154	int32_t start) const;
	1155
	1156	/**
	1157	* Locate in this the last occurrence in the range
	1158	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1159	* in <TT>srcChars</TT>, using bitwise comparison.
	1160	* @param srcChars The text to search for.
	1161	* @param srcLength the number of characters in <TT>srcChars</TT>
	1162	* @param start The offset at which searching will start.
	1163	* @param length The number of characters to search
	1164	* @return The offset into this of the start of <TT>srcChars</TT>,
	1165	* or -1 if not found.
	1166	* @stable ICU 2.0
	1167	*/
	1168	inline int32_t lastIndexOf(const UChar *srcChars,
	1169	int32_t srcLength,
	1170	int32_t start,
	1171	int32_t length) const;
	1172
	1173	/**
	1174	* Locate in this the last occurrence in the range
	1175	* [<TT>start</TT>, <TT>start + length</TT>) of the characters
	1176	* in <TT>srcChars</TT> in the range
	1177	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
	1178	* using bitwise comparison.
	1179	* @param srcChars The text to search for.
	1180	* @param srcStart the offset into <TT>srcChars</TT> at which
	1181	* to start matching
	1182	* @param srcLength the number of characters in <TT>srcChars</TT> to match
	1183	* @param start the offset into this at which to start matching
	1184	* @param length the number of characters in this to search
	1185	* @return The offset into this of the start of <TT>text</TT>,
	1186	* or -1 if not found.
	1187	* @stable ICU 2.0
	1188	*/
	1189	int32_t lastIndexOf(const UChar *srcChars,
	1190	int32_t srcStart,
	1191	int32_t srcLength,
	1192	int32_t start,
	1193	int32_t length) const;
	1194
	1195	/**
	1196	* Locate in this the last occurrence of the BMP code point <code>c</code>,
	1197	* using bitwise comparison.
	1198	* @param c The code unit to search for.
	1199	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1200	* @stable ICU 2.0
	1201	*/
	1202	inline int32_t lastIndexOf(UChar c) const;
	1203
	1204	/**
	1205	* Locate in this the last occurrence of the code point <TT>c</TT>,
	1206	* using bitwise comparison.
	1207	*
	1208	* @param c The code point to search for.
	1209	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1210	* @stable ICU 2.0
	1211	*/
	1212	inline int32_t lastIndexOf(UChar32 c) const;
	1213
	1214	/**
	1215	* Locate in this the last occurrence of the BMP code point <code>c</code>
	1216	* starting at offset <TT>start</TT>, using bitwise comparison.
	1217	* @param c The code unit to search for.
	1218	* @param start The offset at which searching will start.
	1219	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1220	* @stable ICU 2.0
	1221	*/
	1222	inline int32_t lastIndexOf(UChar c,
	1223	int32_t start) const;
	1224
	1225	/**
	1226	* Locate in this the last occurrence of the code point <TT>c</TT>
	1227	* starting at offset <TT>start</TT>, using bitwise comparison.
	1228	*
	1229	* @param c The code point to search for.
	1230	* @param start The offset at which searching will start.
	1231	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1232	* @stable ICU 2.0
	1233	*/
	1234	inline int32_t lastIndexOf(UChar32 c,
	1235	int32_t start) const;
	1236
	1237	/**
	1238	* Locate in this the last occurrence of the BMP code point <code>c</code>
	1239	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1240	* using bitwise comparison.
	1241	* @param c The code unit to search for.
	1242	* @param start the offset into this at which to start matching
	1243	* @param length the number of characters in this to search
	1244	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1245	* @stable ICU 2.0
	1246	*/
	1247	inline int32_t lastIndexOf(UChar c,
	1248	int32_t start,
	1249	int32_t length) const;
	1250
	1251	/**
	1252	* Locate in this the last occurrence of the code point <TT>c</TT>
	1253	* in the range [<TT>start</TT>, <TT>start + length</TT>),
	1254	* using bitwise comparison.
	1255	*
	1256	* @param c The code point to search for.
	1257	* @param start the offset into this at which to start matching
	1258	* @param length the number of characters in this to search
	1259	* @return The offset into this of <TT>c</TT>, or -1 if not found.
	1260	* @stable ICU 2.0
	1261	*/
	1262	inline int32_t lastIndexOf(UChar32 c,
	1263	int32_t start,
	1264	int32_t length) const;
	1265
	1266
	1267	/* Character access */
	1268
	1269	/**
	1270	* Return the code unit at offset <tt>offset</tt>.
	1271	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1272	* @param offset a valid offset into the text
	1273	* @return the code unit at offset <tt>offset</tt>
	1274	* or 0xffff if the offset is not valid for this string
	1275	* @stable ICU 2.0
	1276	*/
	1277	inline UChar charAt(int32_t offset) const;
	1278
	1279	/**
	1280	* Return the code unit at offset <tt>offset</tt>.
	1281	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1282	* @param offset a valid offset into the text
	1283	* @return the code unit at offset <tt>offset</tt>
	1284	* @stable ICU 2.0
	1285	*/
	1286	inline UChar operator[] (int32_t offset) const;
	1287
	1288	/**
	1289	* Return the code point that contains the code unit
	1290	* at offset <tt>offset</tt>.
	1291	* If the offset is not valid (0..length()-1) then U+ffff is returned.
	1292	* @param offset a valid offset into the text
	1293	* that indicates the text offset of any of the code units
	1294	* that will be assembled into a code point (21-bit value) and returned
	1295	* @return the code point of text at <tt>offset</tt>
	1296	* or 0xffff if the offset is not valid for this string
	1297	* @stable ICU 2.0
	1298	*/
	1299	UChar32 char32At(int32_t offset) const;
	1300
	1301	/**
	1302	* Adjust a random-access offset so that
	1303	* it points to the beginning of a Unicode character.
	1304	* The offset that is passed in points to
	1305	* any code unit of a code point,
	1306	* while the returned offset will point to the first code unit
	1307	* of the same code point.
	1308	* In UTF-16, if the input offset points to a second surrogate
	1309	* of a surrogate pair, then the returned offset will point
	1310	* to the first surrogate.
	1311	* @param offset a valid offset into one code point of the text
	1312	* @return offset of the first code unit of the same code point
	1313	* @see U16_SET_CP_START
	1314	* @stable ICU 2.0
	1315	*/
	1316	int32_t getChar32Start(int32_t offset) const;
	1317
	1318	/**
	1319	* Adjust a random-access offset so that
	1320	* it points behind a Unicode character.
	1321	* The offset that is passed in points behind
	1322	* any code unit of a code point,
	1323	* while the returned offset will point behind the last code unit
	1324	* of the same code point.
	1325	* In UTF-16, if the input offset points behind the first surrogate
	1326	* (i.e., to the second surrogate)
	1327	* of a surrogate pair, then the returned offset will point
	1328	* behind the second surrogate (i.e., to the first surrogate).
	1329	* @param offset a valid offset after any code unit of a code point of the text
	1330	* @return offset of the first code unit after the same code point
	1331	* @see U16_SET_CP_LIMIT
	1332	* @stable ICU 2.0
	1333	*/
	1334	int32_t getChar32Limit(int32_t offset) const;
	1335
	1336	/**
	1337	* Move the code unit index along the string by delta code points.
	1338	* Interpret the input index as a code unit-based offset into the string,
	1339	* move the index forward or backward by delta code points, and
	1340	* return the resulting index.
	1341	* The input index should point to the first code unit of a code point,
	1342	* if there is more than one.
	1343	*
	1344	* Both input and output indexes are code unit-based as for all
	1345	* string indexes/offsets in ICU (and other libraries, like MBCS char*).
	1346	* If delta<0 then the index is moved backward (toward the start of the string).
	1347	* If delta>0 then the index is moved forward (toward the end of the string).
	1348	*
	1349	* This behaves like CharacterIterator::move32(delta, kCurrent).
	1350	*
	1351	* Behavior for out-of-bounds indexes:
	1352	* <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
	1353	* if the input index<0 then it is pinned to 0;
	1354	* if it is index>length() then it is pinned to length().
	1355	* Afterwards, the index is moved by <code>delta</code> code points
	1356	* forward or backward,
	1357	* but no further backward than to 0 and no further forward than to length().
	1358	* The resulting index return value will be in between 0 and length(), inclusively.
	1359	*
	1360	* Examples:
	1361	* <pre>
	1362	* // s has code points 'a' U+10000 'b' U+10ffff U+2029
	1363	* UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
	1364	*
	1365	* // initial index: position of U+10000
	1366	* int32_t index=1;
	1367	*
	1368	* // the following examples will all result in index==4, position of U+10ffff
	1369	*
	1370	* // skip 2 code points from some position in the string
	1371	* index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
	1372	*
	1373	* // go to the 3rd code point from the start of s (0-based)
	1374	* index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
	1375	*
	1376	* // go to the next-to-last code point of s
	1377	* index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
	1378	* </pre>
	1379	*
	1380	* @param index input code unit index
	1381	* @param delta (signed) code point count to move the index forward or backward
	1382	* in the string
	1383	* @return the resulting code unit index
	1384	* @stable ICU 2.0
	1385	*/
	1386	int32_t moveIndex32(int32_t index, int32_t delta) const;
	1387
	1388	/* Substring extraction */
	1389
	1390	/**
	1391	* Copy the characters in the range
	1392	* [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
	1393	* beginning at <tt>dstStart</tt>.
	1394	* If the string aliases to <code>dst</code> itself as an external buffer,
	1395	* then extract() will not copy the contents.
	1396	*
	1397	* @param start offset of first character which will be copied into the array
	1398	* @param length the number of characters to extract
	1399	* @param dst array in which to copy characters. The length of <tt>dst</tt>
	1400	* must be at least (<tt>dstStart + length</tt>).
	1401	* @param dstStart the offset in <TT>dst</TT> where the first character
	1402	* will be extracted
	1403	* @stable ICU 2.0
	1404	*/
	1405	inline void extract(int32_t start,
	1406	int32_t length,
	1407	UChar *dst,
	1408	int32_t dstStart = 0) const;
	1409
	1410	/**
	1411	* Copy the contents of the string into dest.
	1412	* This is a convenience function that
	1413	* checks if there is enough space in dest,
	1414	* extracts the entire string if possible,
	1415	* and NUL-terminates dest if possible.
	1416	*
	1417	* If the string fits into dest but cannot be NUL-terminated
	1418	* (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
	1419	* If the string itself does not fit into dest
	1420	* (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
	1421	*
	1422	* If the string aliases to <code>dest</code> itself as an external buffer,
	1423	* then extract() will not copy the contents.
	1424	*
	1425	* @param dest Destination string buffer.
	1426	* @param destCapacity Number of UChars available at dest.
	1427	* @param errorCode ICU error code.
	1428	* @return length()
	1429	* @stable ICU 2.0
	1430	*/
	1431	int32_t
	1432	extract(UChar *dest, int32_t destCapacity,
	1433	UErrorCode &errorCode) const;
	1434
	1435	/**
	1436	* Copy the characters in the range
	1437	* [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
	1438	* <tt>target</tt>.
	1439	* @param start offset of first character which will be copied
	1440	* @param length the number of characters to extract
	1441	* @param target UnicodeString into which to copy characters.
	1442	* @return A reference to <TT>target</TT>
	1443	* @stable ICU 2.0
	1444	*/
	1445	inline void extract(int32_t start,
	1446	int32_t length,
	1447	UnicodeString& target) const;
	1448
	1449	/**
	1450	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
	1451	* into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
	1452	* @param start offset of first character which will be copied into the array
	1453	* @param limit offset immediately following the last character to be copied
	1454	* @param dst array in which to copy characters. The length of <tt>dst</tt>
	1455	* must be at least (<tt>dstStart + (limit - start)</tt>).
	1456	* @param dstStart the offset in <TT>dst</TT> where the first character
	1457	* will be extracted
	1458	* @stable ICU 2.0
	1459	*/
	1460	inline void extractBetween(int32_t start,
	1461	int32_t limit,
	1462	UChar *dst,
	1463	int32_t dstStart = 0) const;
	1464
	1465	/**
	1466	* Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
	1467	* into the UnicodeString <tt>target</tt>. Replaceable API.
	1468	* @param start offset of first character which will be copied
	1469	* @param limit offset immediately following the last character to be copied
	1470	* @param target UnicodeString into which to copy characters.
	1471	* @return A reference to <TT>target</TT>
	1472	* @stable ICU 2.0
	1473	*/
	1474	virtual void extractBetween(int32_t start,
	1475	int32_t limit,
	1476	UnicodeString& target) const;
	1477
	1478	/**
	1479	* Copy the characters in the range
	1480	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
	1481	* All characters must be invariant (see utypes.h).
	1482	* Use US_INV as the last, signature-distinguishing parameter.
	1483	*
	1484	* This function does not write any more than <code>targetLength</code>
	1485	* characters but returns the length of the entire output string
	1486	* so that one can allocate a larger buffer and call the function again
	1487	* if necessary.
	1488	* The output string is NUL-terminated if possible.
	1489	*
	1490	* @param start offset of first character which will be copied
	1491	* @param startLength the number of characters to extract
	1492	* @param target the target buffer for extraction, can be NULL
	1493	* if targetLength is 0
	1494	* @param targetCapacity the length of the target buffer
	1495	* @param inv Signature-distinguishing paramater, use US_INV.
	1496	* @return the output string length, not including the terminating NUL
	1497	* @stable ICU 3.2
	1498	*/
	1499	int32_t extract(int32_t start,
	1500	int32_t startLength,
	1501	char *target,
	1502	int32_t targetCapacity,
	1503	enum EInvariant inv) const;
	1504
	1505	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
	1506
	1507	/**
	1508	* Copy the characters in the range
	1509	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1510	* in the platform's default codepage.
	1511	* This function does not write any more than <code>targetLength</code>
	1512	* characters but returns the length of the entire output string
	1513	* so that one can allocate a larger buffer and call the function again
	1514	* if necessary.
	1515	* The output string is NUL-terminated if possible.
	1516	*
	1517	* @param start offset of first character which will be copied
	1518	* @param startLength the number of characters to extract
	1519	* @param target the target buffer for extraction
	1520	* @param targetLength the length of the target buffer
	1521	* If <TT>target</TT> is NULL, then the number of bytes required for
	1522	* <TT>target</TT> is returned.
	1523	* @return the output string length, not including the terminating NUL
	1524	* @stable ICU 2.0
	1525	*/
	1526	int32_t extract(int32_t start,
	1527	int32_t startLength,
	1528	char *target,
	1529	uint32_t targetLength) const;
	1530
	1531	#endif
	1532
	1533	#if !UCONFIG_NO_CONVERSION
	1534
	1535	/**
	1536	* Copy the characters in the range
	1537	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1538	* in a specified codepage.
	1539	* The output string is NUL-terminated.
	1540	*
	1541	* Recommendation: For invariant-character strings use
	1542	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
	1543	* because it avoids object code dependencies of UnicodeString on
	1544	* the conversion code.
	1545	*
	1546	* @param start offset of first character which will be copied
	1547	* @param startLength the number of characters to extract
	1548	* @param target the target buffer for extraction
	1549	* @param codepage the desired codepage for the characters. 0 has
	1550	* the special meaning of the default codepage
	1551	* If <code>codepage</code> is an empty string (<code>""</code>),
	1552	* then a simple conversion is performed on the codepage-invariant
	1553	* subset ("invariant characters") of the platform encoding. See utypes.h.
	1554	* If <TT>target</TT> is NULL, then the number of bytes required for
	1555	* <TT>target</TT> is returned. It is assumed that the target is big enough
	1556	* to fit all of the characters.
	1557	* @return the output string length, not including the terminating NUL
	1558	* @stable ICU 2.0
	1559	*/
	1560	inline int32_t extract(int32_t start,
	1561	int32_t startLength,
	1562	char *target,
	1563	const char *codepage = 0) const;
	1564
	1565	/**
	1566	* Copy the characters in the range
	1567	* [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
	1568	* in a specified codepage.
	1569	* This function does not write any more than <code>targetLength</code>
	1570	* characters but returns the length of the entire output string
	1571	* so that one can allocate a larger buffer and call the function again
	1572	* if necessary.
	1573	* The output string is NUL-terminated if possible.
	1574	*
	1575	* Recommendation: For invariant-character strings use
	1576	* extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
	1577	* because it avoids object code dependencies of UnicodeString on
	1578	* the conversion code.
	1579	*
	1580	* @param start offset of first character which will be copied
	1581	* @param startLength the number of characters to extract
	1582	* @param target the target buffer for extraction
	1583	* @param targetLength the length of the target buffer
	1584	* @param codepage the desired codepage for the characters. 0 has
	1585	* the special meaning of the default codepage
	1586	* If <code>codepage</code> is an empty string (<code>""</code>),
	1587	* then a simple conversion is performed on the codepage-invariant
	1588	* subset ("invariant characters") of the platform encoding. See utypes.h.
	1589	* If <TT>target</TT> is NULL, then the number of bytes required for
	1590	* <TT>target</TT> is returned.
	1591	* @return the output string length, not including the terminating NUL
	1592	* @stable ICU 2.0
	1593	*/
	1594	int32_t extract(int32_t start,
	1595	int32_t startLength,
	1596	char *target,
	1597	uint32_t targetLength,
	1598	const char *codepage) const;
	1599
	1600	/**
	1601	* Convert the UnicodeString into a codepage string using an existing UConverter.
	1602	* The output string is NUL-terminated if possible.
	1603	*
	1604	* This function avoids the overhead of opening and closing a converter if
	1605	* multiple strings are extracted.
	1606	*
	1607	* @param dest destination string buffer, can be NULL if destCapacity==0
	1608	* @param destCapacity the number of chars available at dest
	1609	* @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
	1610	* or NULL for the default converter
	1611	* @param errorCode normal ICU error code
	1612	* @return the length of the output string, not counting the terminating NUL;
	1613	* if the length is greater than destCapacity, then the string will not fit
	1614	* and a buffer of the indicated length would need to be passed in
	1615	* @stable ICU 2.0
	1616	*/
	1617	int32_t extract(char *dest, int32_t destCapacity,
	1618	UConverter *cnv,
	1619	UErrorCode &errorCode) const;
	1620
	1621	#endif
	1622
	1623	/**
	1624	* Create a temporary substring for the specified range.
	1625	* Unlike the substring constructor and setTo() functions,
	1626	* the object returned here will be a read-only alias (using getBuffer())
	1627	* rather than copying the text.
	1628	* As a result, this substring operation is much faster but requires
	1629	* that the original string not be modified or deleted during the lifetime
	1630	* of the returned substring object.
	1631	* @param start offset of the first character visible in the substring
	1632	* @param length length of the substring
	1633	* @return a read-only alias UnicodeString object for the substring
	1634	* @stable ICU 4.4
	1635	*/
	1636	UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
	1637
	1638	/**
	1639	* Create a temporary substring for the specified range.
	1640	* Same as tempSubString(start, length) except that the substring range
	1641	* is specified as a (start, limit) pair (with an exclusive limit index)
	1642	* rather than a (start, length) pair.
	1643	* @param start offset of the first character visible in the substring
	1644	* @param limit offset immediately following the last character visible in the substring
	1645	* @return a read-only alias UnicodeString object for the substring
	1646	* @stable ICU 4.4
	1647	*/
	1648	inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
	1649
	1650	/**
	1651	* Convert the UnicodeString to UTF-8 and write the result
	1652	* to a ByteSink. This is called by toUTF8String().
	1653	* Unpaired surrogates are replaced with U+FFFD.
	1654	* Calls u_strToUTF8WithSub().
	1655	*
	1656	* @param sink A ByteSink to which the UTF-8 version of the string is written.
	1657	* sink.Flush() is called at the end.
	1658	* @stable ICU 4.2
	1659	* @see toUTF8String
	1660	*/
	1661	void toUTF8(ByteSink &sink) const;
	1662
	1663	#if U_HAVE_STD_STRING
	1664
	1665	/**
	1666	* Convert the UnicodeString to UTF-8 and append the result
	1667	* to a standard string.
	1668	* Unpaired surrogates are replaced with U+FFFD.
	1669	* Calls toUTF8().
	1670	*
	1671	* @param result A standard string (or a compatible object)
	1672	* to which the UTF-8 version of the string is appended.
	1673	* @return The string object.
	1674	* @stable ICU 4.2
	1675	* @see toUTF8
	1676	*/
	1677	template<typename StringClass>
	1678	StringClass &toUTF8String(StringClass &result) const {
	1679	StringByteSink<StringClass> sbs(&result);
	1680	toUTF8(sbs);
	1681	return result;
	1682	}
	1683
	1684	#endif
	1685
	1686	/**
	1687	* Convert the UnicodeString to UTF-32.
	1688	* Unpaired surrogates are replaced with U+FFFD.
	1689	* Calls u_strToUTF32WithSub().
	1690	*
	1691	* @param utf32 destination string buffer, can be NULL if capacity==0
	1692	* @param capacity the number of UChar32s available at utf32
	1693	* @param errorCode Standard ICU error code. Its input value must
	1694	* pass the U_SUCCESS() test, or else the function returns
	1695	* immediately. Check for U_FAILURE() on output or use with
	1696	* function chaining. (See User Guide for details.)
	1697	* @return The length of the UTF-32 string.
	1698	* @see fromUTF32
	1699	* @stable ICU 4.2
	1700	*/
	1701	int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
	1702
	1703	/* Length operations */
	1704
	1705	/**
	1706	* Return the length of the UnicodeString object.
	1707	* The length is the number of UChar code units are in the UnicodeString.
	1708	* If you want the number of code points, please use countChar32().
	1709	* @return the length of the UnicodeString object
	1710	* @see countChar32
	1711	* @stable ICU 2.0
	1712	*/
	1713	inline int32_t length(void) const;
	1714
	1715	/**
	1716	* Count Unicode code points in the length UChar code units of the string.
	1717	* A code point may occupy either one or two UChar code units.
	1718	* Counting code points involves reading all code units.
	1719	*
	1720	* This functions is basically the inverse of moveIndex32().
	1721	*
	1722	* @param start the index of the first code unit to check
	1723	* @param length the number of UChar code units to check
	1724	* @return the number of code points in the specified code units
	1725	* @see length
	1726	* @stable ICU 2.0
	1727	*/
	1728	int32_t
	1729	countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
	1730
	1731	/**
	1732	* Check if the length UChar code units of the string
	1733	* contain more Unicode code points than a certain number.
	1734	* This is more efficient than counting all code points in this part of the string
	1735	* and comparing that number with a threshold.
	1736	* This function may not need to scan the string at all if the length
	1737	* falls within a certain range, and
	1738	* never needs to count more than 'number+1' code points.
	1739	* Logically equivalent to (countChar32(start, length)>number).
	1740	* A Unicode code point may occupy either one or two UChar code units.
	1741	*
	1742	* @param start the index of the first code unit to check (0 for the entire string)
	1743	* @param length the number of UChar code units to check
	1744	* (use INT32_MAX for the entire string; remember that start/length
	1745	* values are pinned)
	1746	* @param number The number of code points in the (sub)string is compared against
	1747	* the 'number' parameter.
	1748	* @return Boolean value for whether the string contains more Unicode code points
	1749	* than 'number'. Same as (u_countChar32(s, length)>number).
	1750	* @see countChar32
	1751	* @see u_strHasMoreChar32Than
	1752	* @stable ICU 2.4
	1753	*/
	1754	UBool
	1755	hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
	1756
	1757	/**
	1758	* Determine if this string is empty.
	1759	* @return TRUE if this string contains 0 characters, FALSE otherwise.
	1760	* @stable ICU 2.0
	1761	*/
	1762	inline UBool isEmpty(void) const;
	1763
	1764	/**
	1765	* Return the capacity of the internal buffer of the UnicodeString object.
	1766	* This is useful together with the getBuffer functions.
	1767	* See there for details.
	1768	*
	1769	* @return the number of UChars available in the internal buffer
	1770	* @see getBuffer
	1771	* @stable ICU 2.0
	1772	*/
	1773	inline int32_t getCapacity(void) const;
	1774
	1775	/* Other operations */
	1776
	1777	/**
	1778	* Generate a hash code for this object.
	1779	* @return The hash code of this UnicodeString.
	1780	* @stable ICU 2.0
	1781	*/
	1782	inline int32_t hashCode(void) const;
	1783
	1784	/**
	1785	* Determine if this object contains a valid string.
	1786	* A bogus string has no value. It is different from an empty string,
	1787	* although in both cases isEmpty() returns TRUE and length() returns 0.
	1788	* setToBogus() and isBogus() can be used to indicate that no string value is available.
	1789	* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
	1790	* length() returns 0.
	1791	*
	1792	* @return TRUE if the string is valid, FALSE otherwise
	1793	* @see setToBogus()
	1794	* @stable ICU 2.0
	1795	*/
	1796	inline UBool isBogus(void) const;
	1797
	1798
	1799	//========================================
	1800	// Write operations
	1801	//========================================
	1802
	1803	/* Assignment operations */
	1804
	1805	/**
	1806	* Assignment operator. Replace the characters in this UnicodeString
	1807	* with the characters from <TT>srcText</TT>.
	1808	* @param srcText The text containing the characters to replace
	1809	* @return a reference to this
	1810	* @stable ICU 2.0
	1811	*/
	1812	UnicodeString &operator=(const UnicodeString &srcText);
	1813
	1814	/**
	1815	* Almost the same as the assignment operator.
	1816	* Replace the characters in this UnicodeString
	1817	* with the characters from <code>srcText</code>.
	1818	*
	1819	* This function works the same for all strings except for ones that
	1820	* are readonly aliases.
	1821	* Starting with ICU 2.4, the assignment operator and the copy constructor
	1822	* allocate a new buffer and copy the buffer contents even for readonly aliases.
	1823	* This function implements the old, more efficient but less safe behavior
	1824	* of making this string also a readonly alias to the same buffer.
	1825	* The fastCopyFrom function must be used only if it is known that the lifetime of
	1826	* this UnicodeString is at least as long as the lifetime of the aliased buffer
	1827	* including its contents, for example for strings from resource bundles
	1828	* or aliases to string contents.
	1829	*
	1830	* @param src The text containing the characters to replace.
	1831	* @return a reference to this
	1832	* @stable ICU 2.4
	1833	*/
	1834	UnicodeString &fastCopyFrom(const UnicodeString &src);
	1835
	1836	/**
	1837	* Assignment operator. Replace the characters in this UnicodeString
	1838	* with the code unit <TT>ch</TT>.
	1839	* @param ch the code unit to replace
	1840	* @return a reference to this
	1841	* @stable ICU 2.0
	1842	*/
	1843	inline UnicodeString& operator= (UChar ch);
	1844
	1845	/**
	1846	* Assignment operator. Replace the characters in this UnicodeString
	1847	* with the code point <TT>ch</TT>.
	1848	* @param ch the code point to replace
	1849	* @return a reference to this
	1850	* @stable ICU 2.0
	1851	*/
	1852	inline UnicodeString& operator= (UChar32 ch);
	1853
	1854	/**
	1855	* Set the text in the UnicodeString object to the characters
	1856	* in <TT>srcText</TT> in the range
	1857	* [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
	1858	* <TT>srcText</TT> is not modified.
	1859	* @param srcText the source for the new characters
	1860	* @param srcStart the offset into <TT>srcText</TT> where new characters
	1861	* will be obtained
	1862	* @return a reference to this
	1863	* @stable ICU 2.2
	1864	*/
	1865	inline UnicodeString& setTo(const UnicodeString& srcText,
	1866	int32_t srcStart);
	1867
	1868	/**
	1869	* Set the text in the UnicodeString object to the characters
	1870	* in <TT>srcText</TT> in the range
	1871	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	1872	* <TT>srcText</TT> is not modified.
	1873	* @param srcText the source for the new characters
	1874	* @param srcStart the offset into <TT>srcText</TT> where new characters
	1875	* will be obtained
	1876	* @param srcLength the number of characters in <TT>srcText</TT> in the
	1877	* replace string.
	1878	* @return a reference to this
	1879	* @stable ICU 2.0
	1880	*/
	1881	inline UnicodeString& setTo(const UnicodeString& srcText,
	1882	int32_t srcStart,
	1883	int32_t srcLength);
	1884
	1885	/**
	1886	* Set the text in the UnicodeString object to the characters in
	1887	* <TT>srcText</TT>.
	1888	* <TT>srcText</TT> is not modified.
	1889	* @param srcText the source for the new characters
	1890	* @return a reference to this
	1891	* @stable ICU 2.0
	1892	*/
	1893	inline UnicodeString& setTo(const UnicodeString& srcText);
	1894
	1895	/**
	1896	* Set the characters in the UnicodeString object to the characters
	1897	* in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
	1898	* @param srcChars the source for the new characters
	1899	* @param srcLength the number of Unicode characters in srcChars.
	1900	* @return a reference to this
	1901	* @stable ICU 2.0
	1902	*/
	1903	inline UnicodeString& setTo(const UChar *srcChars,
	1904	int32_t srcLength);
	1905
	1906	/**
	1907	* Set the characters in the UnicodeString object to the code unit
	1908	* <TT>srcChar</TT>.
	1909	* @param srcChar the code unit which becomes the UnicodeString's character
	1910	* content
	1911	* @return a reference to this
	1912	* @stable ICU 2.0
	1913	*/
	1914	UnicodeString& setTo(UChar srcChar);
	1915
	1916	/**
	1917	* Set the characters in the UnicodeString object to the code point
	1918	* <TT>srcChar</TT>.
	1919	* @param srcChar the code point which becomes the UnicodeString's character
	1920	* content
	1921	* @return a reference to this
	1922	* @stable ICU 2.0
	1923	*/
	1924	UnicodeString& setTo(UChar32 srcChar);
	1925
	1926	/**
	1927	* Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
	1928	* The text will be used for the UnicodeString object, but
	1929	* it will not be released when the UnicodeString is destroyed.
	1930	* This has copy-on-write semantics:
	1931	* When the string is modified, then the buffer is first copied into
	1932	* newly allocated memory.
	1933	* The aliased buffer is never modified.
	1934	* In an assignment to another UnicodeString, the text will be aliased again,
	1935	* so that both strings then alias the same readonly-text.
	1936	*
	1937	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
	1938	* This must be true if <code>textLength==-1</code>.
	1939	* @param text The characters to alias for the UnicodeString.
	1940	* @param textLength The number of Unicode characters in <code>text</code> to alias.
	1941	* If -1, then this constructor will determine the length
	1942	* by calling <code>u_strlen()</code>.
	1943	* @return a reference to this
	1944	* @stable ICU 2.0
	1945	*/
	1946	UnicodeString &setTo(UBool isTerminated,
	1947	const UChar *text,
	1948	int32_t textLength);
	1949
	1950	/**
	1951	* Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
	1952	* The text will be used for the UnicodeString object, but
	1953	* it will not be released when the UnicodeString is destroyed.
	1954	* This has write-through semantics:
	1955	* For as long as the capacity of the buffer is sufficient, write operations
	1956	* will directly affect the buffer. When more capacity is necessary, then
	1957	* a new buffer will be allocated and the contents copied as with regularly
	1958	* constructed strings.
	1959	* In an assignment to another UnicodeString, the buffer will be copied.
	1960	* The extract(UChar *dst) function detects whether the dst pointer is the same
	1961	* as the string buffer itself and will in this case not copy the contents.
	1962	*
	1963	* @param buffer The characters to alias for the UnicodeString.
	1964	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
	1965	* @param buffCapacity The size of <code>buffer</code> in UChars.
	1966	* @return a reference to this
	1967	* @stable ICU 2.0
	1968	*/
	1969	UnicodeString &setTo(UChar *buffer,
	1970	int32_t buffLength,
	1971	int32_t buffCapacity);
	1972
	1973	/**
	1974	* Make this UnicodeString object invalid.
	1975	* The string will test TRUE with isBogus().
	1976	*
	1977	* A bogus string has no value. It is different from an empty string.
	1978	* It can be used to indicate that no string value is available.
	1979	* getBuffer() and getTerminatedBuffer() return NULL, and
	1980	* length() returns 0.
	1981	*
	1982	* This utility function is used throughout the UnicodeString
	1983	* implementation to indicate that a UnicodeString operation failed,
	1984	* and may be used in other functions,
	1985	* especially but not exclusively when such functions do not
	1986	* take a UErrorCode for simplicity.
	1987	*
	1988	* The following methods, and no others, will clear a string object's bogus flag:
	1989	* - remove()
	1990	* - remove(0, INT32_MAX)
	1991	* - truncate(0)
	1992	* - operator=() (assignment operator)
	1993	* - setTo(...)
	1994	*
	1995	* The simplest ways to turn a bogus string into an empty one
	1996	* is to use the remove() function.
	1997	* Examples for other functions that are equivalent to "set to empty string":
	1998	* \code
	1999	* if(s.isBogus()) {
	2000	* s.remove(); // set to an empty string (remove all), or
	2001	* s.remove(0, INT32_MAX); // set to an empty string (remove all), or
	2002	* s.truncate(0); // set to an empty string (complete truncation), or
	2003	* s=UnicodeString(); // assign an empty string, or
	2004	* s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
	2005	* static const UChar nul=0;
	2006	* s.setTo(&nul, 0); // set to an empty C Unicode string
	2007	* }
	2008	* \endcode
	2009	*
	2010	* @see isBogus()
	2011	* @stable ICU 2.0
	2012	*/
	2013	void setToBogus();
	2014
	2015	/**
	2016	* Set the character at the specified offset to the specified character.
	2017	* @param offset A valid offset into the text of the character to set
	2018	* @param ch The new character
	2019	* @return A reference to this
	2020	* @stable ICU 2.0
	2021	*/
	2022	UnicodeString& setCharAt(int32_t offset,
	2023	UChar ch);
	2024
	2025
	2026	/* Append operations */
	2027
	2028	/**
	2029	* Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
	2030	* object.
	2031	* @param ch the code unit to be appended
	2032	* @return a reference to this
	2033	* @stable ICU 2.0
	2034	*/
	2035	inline UnicodeString& operator+= (UChar ch);
	2036
	2037	/**
	2038	* Append operator. Append the code point <TT>ch</TT> to the UnicodeString
	2039	* object.
	2040	* @param ch the code point to be appended
	2041	* @return a reference to this
	2042	* @stable ICU 2.0
	2043	*/
	2044	inline UnicodeString& operator+= (UChar32 ch);
	2045
	2046	/**
	2047	* Append operator. Append the characters in <TT>srcText</TT> to the
	2048	* UnicodeString object. <TT>srcText</TT> is not modified.
	2049	* @param srcText the source for the new characters
	2050	* @return a reference to this
	2051	* @stable ICU 2.0
	2052	*/
	2053	inline UnicodeString& operator+= (const UnicodeString& srcText);
	2054
	2055	/**
	2056	* Append the characters
	2057	* in <TT>srcText</TT> in the range
	2058	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
	2059	* UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
	2060	* is not modified.
	2061	* @param srcText the source for the new characters
	2062	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2063	* will be obtained
	2064	* @param srcLength the number of characters in <TT>srcText</TT> in
	2065	* the append string
	2066	* @return a reference to this
	2067	* @stable ICU 2.0
	2068	*/
	2069	inline UnicodeString& append(const UnicodeString& srcText,
	2070	int32_t srcStart,
	2071	int32_t srcLength);
	2072
	2073	/**
	2074	* Append the characters in <TT>srcText</TT> to the UnicodeString object.
	2075	* <TT>srcText</TT> is not modified.
	2076	* @param srcText the source for the new characters
	2077	* @return a reference to this
	2078	* @stable ICU 2.0
	2079	*/
	2080	inline UnicodeString& append(const UnicodeString& srcText);
	2081
	2082	/**
	2083	* Append the characters in <TT>srcChars</TT> in the range
	2084	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
	2085	* object at offset
	2086	* <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2087	* @param srcChars the source for the new characters
	2088	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2089	* will be obtained
	2090	* @param srcLength the number of characters in <TT>srcChars</TT> in
	2091	* the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
	2092	* @return a reference to this
	2093	* @stable ICU 2.0
	2094	*/
	2095	inline UnicodeString& append(const UChar *srcChars,
	2096	int32_t srcStart,
	2097	int32_t srcLength);
	2098
	2099	/**
	2100	* Append the characters in <TT>srcChars</TT> to the UnicodeString object
	2101	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2102	* @param srcChars the source for the new characters
	2103	* @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
	2104	* can be -1 if <TT>srcChars</TT> is NUL-terminated
	2105	* @return a reference to this
	2106	* @stable ICU 2.0
	2107	*/
	2108	inline UnicodeString& append(const UChar *srcChars,
	2109	int32_t srcLength);
	2110
	2111	/**
	2112	* Append the code unit <TT>srcChar</TT> to the UnicodeString object.
	2113	* @param srcChar the code unit to append
	2114	* @return a reference to this
	2115	* @stable ICU 2.0
	2116	*/
	2117	inline UnicodeString& append(UChar srcChar);
	2118
	2119	/**
	2120	* Append the code point <TT>srcChar</TT> to the UnicodeString object.
	2121	* @param srcChar the code point to append
	2122	* @return a reference to this
	2123	* @stable ICU 2.0
	2124	*/
	2125	UnicodeString& append(UChar32 srcChar);
	2126
	2127
	2128	/* Insert operations */
	2129
	2130	/**
	2131	* Insert the characters in <TT>srcText</TT> in the range
	2132	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
	2133	* object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
	2134	* @param start the offset where the insertion begins
	2135	* @param srcText the source for the new characters
	2136	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2137	* will be obtained
	2138	* @param srcLength the number of characters in <TT>srcText</TT> in
	2139	* the insert string
	2140	* @return a reference to this
	2141	* @stable ICU 2.0
	2142	*/
	2143	inline UnicodeString& insert(int32_t start,
	2144	const UnicodeString& srcText,
	2145	int32_t srcStart,
	2146	int32_t srcLength);
	2147
	2148	/**
	2149	* Insert the characters in <TT>srcText</TT> into the UnicodeString object
	2150	* at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
	2151	* @param start the offset where the insertion begins
	2152	* @param srcText the source for the new characters
	2153	* @return a reference to this
	2154	* @stable ICU 2.0
	2155	*/
	2156	inline UnicodeString& insert(int32_t start,
	2157	const UnicodeString& srcText);
	2158
	2159	/**
	2160	* Insert the characters in <TT>srcChars</TT> in the range
	2161	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
	2162	* object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2163	* @param start the offset at which the insertion begins
	2164	* @param srcChars the source for the new characters
	2165	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2166	* will be obtained
	2167	* @param srcLength the number of characters in <TT>srcChars</TT>
	2168	* in the insert string
	2169	* @return a reference to this
	2170	* @stable ICU 2.0
	2171	*/
	2172	inline UnicodeString& insert(int32_t start,
	2173	const UChar *srcChars,
	2174	int32_t srcStart,
	2175	int32_t srcLength);
	2176
	2177	/**
	2178	* Insert the characters in <TT>srcChars</TT> into the UnicodeString object
	2179	* at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
	2180	* @param start the offset where the insertion begins
	2181	* @param srcChars the source for the new characters
	2182	* @param srcLength the number of Unicode characters in srcChars.
	2183	* @return a reference to this
	2184	* @stable ICU 2.0
	2185	*/
	2186	inline UnicodeString& insert(int32_t start,
	2187	const UChar *srcChars,
	2188	int32_t srcLength);
	2189
	2190	/**
	2191	* Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
	2192	* offset <TT>start</TT>.
	2193	* @param start the offset at which the insertion occurs
	2194	* @param srcChar the code unit to insert
	2195	* @return a reference to this
	2196	* @stable ICU 2.0
	2197	*/
	2198	inline UnicodeString& insert(int32_t start,
	2199	UChar srcChar);
	2200
	2201	/**
	2202	* Insert the code point <TT>srcChar</TT> into the UnicodeString object at
	2203	* offset <TT>start</TT>.
	2204	* @param start the offset at which the insertion occurs
	2205	* @param srcChar the code point to insert
	2206	* @return a reference to this
	2207	* @stable ICU 2.0
	2208	*/
	2209	inline UnicodeString& insert(int32_t start,
	2210	UChar32 srcChar);
	2211
	2212
	2213	/* Replace operations */
	2214
	2215	/**
	2216	* Replace the characters in the range
	2217	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2218	* <TT>srcText</TT> in the range
	2219	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
	2220	* <TT>srcText</TT> is not modified.
	2221	* @param start the offset at which the replace operation begins
	2222	* @param length the number of characters to replace. The character at
	2223	* <TT>start + length</TT> is not modified.
	2224	* @param srcText the source for the new characters
	2225	* @param srcStart the offset into <TT>srcText</TT> where new characters
	2226	* will be obtained
	2227	* @param srcLength the number of characters in <TT>srcText</TT> in
	2228	* the replace string
	2229	* @return a reference to this
	2230	* @stable ICU 2.0
	2231	*/
	2232	UnicodeString& replace(int32_t start,
	2233	int32_t length,
	2234	const UnicodeString& srcText,
	2235	int32_t srcStart,
	2236	int32_t srcLength);
	2237
	2238	/**
	2239	* Replace the characters in the range
	2240	* [<TT>start</TT>, <TT>start + length</TT>)
	2241	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
	2242	* not modified.
	2243	* @param start the offset at which the replace operation begins
	2244	* @param length the number of characters to replace. The character at
	2245	* <TT>start + length</TT> is not modified.
	2246	* @param srcText the source for the new characters
	2247	* @return a reference to this
	2248	* @stable ICU 2.0
	2249	*/
	2250	UnicodeString& replace(int32_t start,
	2251	int32_t length,
	2252	const UnicodeString& srcText);
	2253
	2254	/**
	2255	* Replace the characters in the range
	2256	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2257	* <TT>srcChars</TT> in the range
	2258	* [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
	2259	* is not modified.
	2260	* @param start the offset at which the replace operation begins
	2261	* @param length the number of characters to replace. The character at
	2262	* <TT>start + length</TT> is not modified.
	2263	* @param srcChars the source for the new characters
	2264	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2265	* will be obtained
	2266	* @param srcLength the number of characters in <TT>srcChars</TT>
	2267	* in the replace string
	2268	* @return a reference to this
	2269	* @stable ICU 2.0
	2270	*/
	2271	UnicodeString& replace(int32_t start,
	2272	int32_t length,
	2273	const UChar *srcChars,
	2274	int32_t srcStart,
	2275	int32_t srcLength);
	2276
	2277	/**
	2278	* Replace the characters in the range
	2279	* [<TT>start</TT>, <TT>start + length</TT>) with the characters in
	2280	* <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
	2281	* @param start the offset at which the replace operation begins
	2282	* @param length number of characters to replace. The character at
	2283	* <TT>start + length</TT> is not modified.
	2284	* @param srcChars the source for the new characters
	2285	* @param srcLength the number of Unicode characters in srcChars
	2286	* @return a reference to this
	2287	* @stable ICU 2.0
	2288	*/
	2289	inline UnicodeString& replace(int32_t start,
	2290	int32_t length,
	2291	const UChar *srcChars,
	2292	int32_t srcLength);
	2293
	2294	/**
	2295	* Replace the characters in the range
	2296	* [<TT>start</TT>, <TT>start + length</TT>) with the code unit
	2297	* <TT>srcChar</TT>.
	2298	* @param start the offset at which the replace operation begins
	2299	* @param length the number of characters to replace. The character at
	2300	* <TT>start + length</TT> is not modified.
	2301	* @param srcChar the new code unit
	2302	* @return a reference to this
	2303	* @stable ICU 2.0
	2304	*/
	2305	inline UnicodeString& replace(int32_t start,
	2306	int32_t length,
	2307	UChar srcChar);
	2308
	2309	/**
	2310	* Replace the characters in the range
	2311	* [<TT>start</TT>, <TT>start + length</TT>) with the code point
	2312	* <TT>srcChar</TT>.
	2313	* @param start the offset at which the replace operation begins
	2314	* @param length the number of characters to replace. The character at
	2315	* <TT>start + length</TT> is not modified.
	2316	* @param srcChar the new code point
	2317	* @return a reference to this
	2318	* @stable ICU 2.0
	2319	*/
	2320	UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
	2321
	2322	/**
	2323	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
	2324	* with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
	2325	* @param start the offset at which the replace operation begins
	2326	* @param limit the offset immediately following the replace range
	2327	* @param srcText the source for the new characters
	2328	* @return a reference to this
	2329	* @stable ICU 2.0
	2330	*/
	2331	inline UnicodeString& replaceBetween(int32_t start,
	2332	int32_t limit,
	2333	const UnicodeString& srcText);
	2334
	2335	/**
	2336	* Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
	2337	* with the characters in <TT>srcText</TT> in the range
	2338	* [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
	2339	* @param start the offset at which the replace operation begins
	2340	* @param limit the offset immediately following the replace range
	2341	* @param srcText the source for the new characters
	2342	* @param srcStart the offset into <TT>srcChars</TT> where new characters
	2343	* will be obtained
	2344	* @param srcLimit the offset immediately following the range to copy
	2345	* in <TT>srcText</TT>
	2346	* @return a reference to this
	2347	* @stable ICU 2.0
	2348	*/
	2349	inline UnicodeString& replaceBetween(int32_t start,
	2350	int32_t limit,
	2351	const UnicodeString& srcText,
	2352	int32_t srcStart,
	2353	int32_t srcLimit);
	2354
	2355	/**
	2356	* Replace a substring of this object with the given text.
	2357	* @param start the beginning index, inclusive; <code>0 <= start
	2358	* <= limit</code>.
	2359	* @param limit the ending index, exclusive; <code>start <= limit
	2360	* <= length()</code>.
	2361	* @param text the text to replace characters <code>start</code>
	2362	* to <code>limit - 1</code>
	2363	* @stable ICU 2.0
	2364	*/
	2365	virtual void handleReplaceBetween(int32_t start,
	2366	int32_t limit,
	2367	const UnicodeString& text);
	2368
	2369	/**
	2370	* Replaceable API
	2371	* @return TRUE if it has MetaData
	2372	* @stable ICU 2.4
	2373	*/
	2374	virtual UBool hasMetaData() const;
	2375
	2376	/**
	2377	* Copy a substring of this object, retaining attribute (out-of-band)
	2378	* information. This method is used to duplicate or reorder substrings.
	2379	* The destination index must not overlap the source range.
	2380	*
	2381	* @param start the beginning index, inclusive; <code>0 <= start <=
	2382	* limit</code>.
	2383	* @param limit the ending index, exclusive; <code>start <= limit <=
	2384	* length()</code>.
	2385	* @param dest the destination index. The characters from
	2386	* <code>start..limit-1</code> will be copied to <code>dest</code>.
	2387	* Implementations of this method may assume that <code>dest <= start \|\|
	2388	* dest >= limit</code>.
	2389	* @stable ICU 2.0
	2390	*/
	2391	virtual void copy(int32_t start, int32_t limit, int32_t dest);
	2392
	2393	/* Search and replace operations */
	2394
	2395	/**
	2396	* Replace all occurrences of characters in oldText with the characters
	2397	* in newText
	2398	* @param oldText the text containing the search text
	2399	* @param newText the text containing the replacement text
	2400	* @return a reference to this
	2401	* @stable ICU 2.0
	2402	*/
	2403	inline UnicodeString& findAndReplace(const UnicodeString& oldText,
	2404	const UnicodeString& newText);
	2405
	2406	/**
	2407	* Replace all occurrences of characters in oldText with characters
	2408	* in newText
	2409	* in the range [<TT>start</TT>, <TT>start + length</TT>).
	2410	* @param start the start of the range in which replace will performed
	2411	* @param length the length of the range in which replace will be performed
	2412	* @param oldText the text containing the search text
	2413	* @param newText the text containing the replacement text
	2414	* @return a reference to this
	2415	* @stable ICU 2.0
	2416	*/
	2417	inline UnicodeString& findAndReplace(int32_t start,
	2418	int32_t length,
	2419	const UnicodeString& oldText,
	2420	const UnicodeString& newText);
	2421
	2422	/**
	2423	* Replace all occurrences of characters in oldText in the range
	2424	* [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
	2425	* in newText in the range
	2426	* [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
	2427	* in the range [<TT>start</TT>, <TT>start + length</TT>).
	2428	* @param start the start of the range in which replace will performed
	2429	* @param length the length of the range in which replace will be performed
	2430	* @param oldText the text containing the search text
	2431	* @param oldStart the start of the search range in <TT>oldText</TT>
	2432	* @param oldLength the length of the search range in <TT>oldText</TT>
	2433	* @param newText the text containing the replacement text
	2434	* @param newStart the start of the replacement range in <TT>newText</TT>
	2435	* @param newLength the length of the replacement range in <TT>newText</TT>
	2436	* @return a reference to this
	2437	* @stable ICU 2.0
	2438	*/
	2439	UnicodeString& findAndReplace(int32_t start,
	2440	int32_t length,
	2441	const UnicodeString& oldText,
	2442	int32_t oldStart,
	2443	int32_t oldLength,
	2444	const UnicodeString& newText,
	2445	int32_t newStart,
	2446	int32_t newLength);
	2447
	2448
	2449	/* Remove operations */
	2450
	2451	/**
	2452	* Remove all characters from the UnicodeString object.
	2453	* @return a reference to this
	2454	* @stable ICU 2.0
	2455	*/
	2456	inline UnicodeString& remove(void);
	2457
	2458	/**
	2459	* Remove the characters in the range
	2460	* [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
	2461	* @param start the offset of the first character to remove
	2462	* @param length the number of characters to remove
	2463	* @return a reference to this
	2464	* @stable ICU 2.0
	2465	*/
	2466	inline UnicodeString& remove(int32_t start,
	2467	int32_t length = (int32_t)INT32_MAX);
	2468
	2469	/**
	2470	* Remove the characters in the range
	2471	* [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
	2472	* @param start the offset of the first character to remove
	2473	* @param limit the offset immediately following the range to remove
	2474	* @return a reference to this
	2475	* @stable ICU 2.0
	2476	*/
	2477	inline UnicodeString& removeBetween(int32_t start,
	2478	int32_t limit = (int32_t)INT32_MAX);
	2479
	2480	/**
	2481	* Retain only the characters in the range
	2482	* [<code>start</code>, <code>limit</code>) from the UnicodeString object.
	2483	* Removes characters before <code>start</code> and at and after <code>limit</code>.
	2484	* @param start the offset of the first character to retain
	2485	* @param limit the offset immediately following the range to retain
	2486	* @return a reference to this
	2487	* @stable ICU 4.4
	2488	*/
	2489	inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
	2490
	2491	/* Length operations */
	2492
	2493	/**
	2494	* Pad the start of this UnicodeString with the character <TT>padChar</TT>.
	2495	* If the length of this UnicodeString is less than targetLength,
	2496	* length() - targetLength copies of padChar will be added to the
	2497	* beginning of this UnicodeString.
	2498	* @param targetLength the desired length of the string
	2499	* @param padChar the character to use for padding. Defaults to
	2500	* space (U+0020)
	2501	* @return TRUE if the text was padded, FALSE otherwise.
	2502	* @stable ICU 2.0
	2503	*/
	2504	UBool padLeading(int32_t targetLength,
	2505	UChar padChar = 0x0020);
	2506
	2507	/**
	2508	* Pad the end of this UnicodeString with the character <TT>padChar</TT>.
	2509	* If the length of this UnicodeString is less than targetLength,
	2510	* length() - targetLength copies of padChar will be added to the
	2511	* end of this UnicodeString.
	2512	* @param targetLength the desired length of the string
	2513	* @param padChar the character to use for padding. Defaults to
	2514	* space (U+0020)
	2515	* @return TRUE if the text was padded, FALSE otherwise.
	2516	* @stable ICU 2.0
	2517	*/
	2518	UBool padTrailing(int32_t targetLength,
	2519	UChar padChar = 0x0020);
	2520
	2521	/**
	2522	* Truncate this UnicodeString to the <TT>targetLength</TT>.
	2523	* @param targetLength the desired length of this UnicodeString.
	2524	* @return TRUE if the text was truncated, FALSE otherwise
	2525	* @stable ICU 2.0
	2526	*/
	2527	inline UBool truncate(int32_t targetLength);
	2528
	2529	/**
	2530	* Trims leading and trailing whitespace from this UnicodeString.
	2531	* @return a reference to this
	2532	* @stable ICU 2.0
	2533	*/
	2534	UnicodeString& trim(void);
	2535
	2536
	2537	/* Miscellaneous operations */
	2538
	2539	/**
	2540	* Reverse this UnicodeString in place.
	2541	* @return a reference to this
	2542	* @stable ICU 2.0
	2543	*/
	2544	inline UnicodeString& reverse(void);
	2545
	2546	/**
	2547	* Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
	2548	* this UnicodeString.
	2549	* @param start the start of the range to reverse
	2550	* @param length the number of characters to to reverse
	2551	* @return a reference to this
	2552	* @stable ICU 2.0
	2553	*/
	2554	inline UnicodeString& reverse(int32_t start,
	2555	int32_t length);
	2556
	2557	/**
	2558	* Convert the characters in this to UPPER CASE following the conventions of
	2559	* the default locale.
	2560	* @return A reference to this.
	2561	* @stable ICU 2.0
	2562	*/
	2563	UnicodeString& toUpper(void);
	2564
	2565	/**
	2566	* Convert the characters in this to UPPER CASE following the conventions of
	2567	* a specific locale.
	2568	* @param locale The locale containing the conventions to use.
	2569	* @return A reference to this.
	2570	* @stable ICU 2.0
	2571	*/
	2572	UnicodeString& toUpper(const Locale& locale);
	2573
	2574	/**
	2575	* Convert the characters in this to lower case following the conventions of
	2576	* the default locale.
	2577	* @return A reference to this.
	2578	* @stable ICU 2.0
	2579	*/
	2580	UnicodeString& toLower(void);
	2581
	2582	/**
	2583	* Convert the characters in this to lower case following the conventions of
	2584	* a specific locale.
	2585	* @param locale The locale containing the conventions to use.
	2586	* @return A reference to this.
	2587	* @stable ICU 2.0
	2588	*/
	2589	UnicodeString& toLower(const Locale& locale);
	2590
	2591	#if !UCONFIG_NO_BREAK_ITERATION
	2592
	2593	/**
	2594	* Titlecase this string, convenience function using the default locale.
	2595	*
	2596	* Casing is locale-dependent and context-sensitive.
	2597	* Titlecasing uses a break iterator to find the first characters of words
	2598	* that are to be titlecased. It titlecases those characters and lowercases
	2599	* all others.
	2600	*
	2601	* The titlecase break iterator can be provided to customize for arbitrary
	2602	* styles, using rules and dictionaries beyond the standard iterators.
	2603	* It may be more efficient to always provide an iterator to avoid
	2604	* opening and closing one for each string.
	2605	* The standard titlecase iterator for the root locale implements the
	2606	* algorithm of Unicode TR 21.
	2607	*
	2608	* This function uses only the setText(), first() and next() methods of the
	2609	* provided break iterator.
	2610	*
	2611	* @param titleIter A break iterator to find the first characters of words
	2612	* that are to be titlecased.
	2613	* If none is provided (0), then a standard titlecase
	2614	* break iterator is opened.
	2615	* Otherwise the provided iterator is set to the string's text.
	2616	* @return A reference to this.
	2617	* @stable ICU 2.1
	2618	*/
	2619	UnicodeString &toTitle(BreakIterator *titleIter);
	2620
	2621	/**
	2622	* Titlecase this string.
	2623	*
	2624	* Casing is locale-dependent and context-sensitive.
	2625	* Titlecasing uses a break iterator to find the first characters of words
	2626	* that are to be titlecased. It titlecases those characters and lowercases
	2627	* all others.
	2628	*
	2629	* The titlecase break iterator can be provided to customize for arbitrary
	2630	* styles, using rules and dictionaries beyond the standard iterators.
	2631	* It may be more efficient to always provide an iterator to avoid
	2632	* opening and closing one for each string.
	2633	* The standard titlecase iterator for the root locale implements the
	2634	* algorithm of Unicode TR 21.
	2635	*
	2636	* This function uses only the setText(), first() and next() methods of the
	2637	* provided break iterator.
	2638	*
	2639	* @param titleIter A break iterator to find the first characters of words
	2640	* that are to be titlecased.
	2641	* If none is provided (0), then a standard titlecase
	2642	* break iterator is opened.
	2643	* Otherwise the provided iterator is set to the string's text.
	2644	* @param locale The locale to consider.
	2645	* @return A reference to this.
	2646	* @stable ICU 2.1
	2647	*/
	2648	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
	2649
	2650	/**
	2651	* Titlecase this string, with options.
	2652	*
	2653	* Casing is locale-dependent and context-sensitive.
	2654	* Titlecasing uses a break iterator to find the first characters of words
	2655	* that are to be titlecased. It titlecases those characters and lowercases
	2656	* all others. (This can be modified with options.)
	2657	*
	2658	* The titlecase break iterator can be provided to customize for arbitrary
	2659	* styles, using rules and dictionaries beyond the standard iterators.
	2660	* It may be more efficient to always provide an iterator to avoid
	2661	* opening and closing one for each string.
	2662	* The standard titlecase iterator for the root locale implements the
	2663	* algorithm of Unicode TR 21.
	2664	*
	2665	* This function uses only the setText(), first() and next() methods of the
	2666	* provided break iterator.
	2667	*
	2668	* @param titleIter A break iterator to find the first characters of words
	2669	* that are to be titlecased.
	2670	* If none is provided (0), then a standard titlecase
	2671	* break iterator is opened.
	2672	* Otherwise the provided iterator is set to the string's text.
	2673	* @param locale The locale to consider.
	2674	* @param options Options bit set, see ucasemap_open().
	2675	* @return A reference to this.
	2676	* @see U_TITLECASE_NO_LOWERCASE
	2677	* @see U_TITLECASE_NO_BREAK_ADJUSTMENT
	2678	* @see ucasemap_open
	2679	* @stable ICU 3.8
	2680	*/
	2681	UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
	2682
	2683	#endif
	2684
	2685	/**
	2686	* Case-fold the characters in this string.
	2687	* Case-folding is locale-independent and not context-sensitive,
	2688	* but there is an option for whether to include or exclude mappings for dotted I
	2689	* and dotless i that are marked with 'I' in CaseFolding.txt.
	2690	* The result may be longer or shorter than the original.
	2691	*
	2692	* @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
	2693	* @return A reference to this.
	2694	* @stable ICU 2.0
	2695	*/
	2696	UnicodeString &foldCase(uint32_t options=0 /U_FOLD_CASE_DEFAULT/);
	2697
	2698	//========================================
	2699	// Access to the internal buffer
	2700	//========================================
	2701
	2702	/**
	2703	* Get a read/write pointer to the internal buffer.
	2704	* The buffer is guaranteed to be large enough for at least minCapacity UChars,
	2705	* writable, and is still owned by the UnicodeString object.
	2706	* Calls to getBuffer(minCapacity) must not be nested, and
	2707	* must be matched with calls to releaseBuffer(newLength).
	2708	* If the string buffer was read-only or shared,
	2709	* then it will be reallocated and copied.
	2710	*
	2711	* An attempted nested call will return 0, and will not further modify the
	2712	* state of the UnicodeString object.
	2713	* It also returns 0 if the string is bogus.
	2714	*
	2715	* The actual capacity of the string buffer may be larger than minCapacity.
	2716	* getCapacity() returns the actual capacity.
	2717	* For many operations, the full capacity should be used to avoid reallocations.
	2718	*
	2719	* While the buffer is "open" between getBuffer(minCapacity)
	2720	* and releaseBuffer(newLength), the following applies:
	2721	* - The string length is set to 0.
	2722	* - Any read API call on the UnicodeString object will behave like on a 0-length string.
	2723	* - Any write API call on the UnicodeString object is disallowed and will have no effect.
	2724	* - You can read from and write to the returned buffer.
	2725	* - The previous string contents will still be in the buffer;
	2726	* if you want to use it, then you need to call length() before getBuffer(minCapacity).
	2727	* If the length() was greater than minCapacity, then any contents after minCapacity
	2728	* may be lost.
	2729	* The buffer contents is not NUL-terminated by getBuffer().
	2730	* If length()<getCapacity() then you can terminate it by writing a NUL
	2731	* at index length().
	2732	* - You must call releaseBuffer(newLength) before and in order to
	2733	* return to normal UnicodeString operation.
	2734	*
	2735	* @param minCapacity the minimum number of UChars that are to be available
	2736	* in the buffer, starting at the returned pointer;
	2737	* default to the current string capacity if minCapacity==-1
	2738	* @return a writable pointer to the internal string buffer,
	2739	* or 0 if an error occurs (nested calls, out of memory)
	2740	*
	2741	* @see releaseBuffer
	2742	* @see getTerminatedBuffer()
	2743	* @stable ICU 2.0
	2744	*/
	2745	UChar *getBuffer(int32_t minCapacity);
	2746
	2747	/**
	2748	* Release a read/write buffer on a UnicodeString object with an
	2749	* "open" getBuffer(minCapacity).
	2750	* This function must be called in a matched pair with getBuffer(minCapacity).
	2751	* releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
	2752	*
	2753	* It will set the string length to newLength, at most to the current capacity.
	2754	* If newLength==-1 then it will set the length according to the
	2755	* first NUL in the buffer, or to the capacity if there is no NUL.
	2756	*
	2757	* After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
	2758	*
	2759	* @param newLength the new length of the UnicodeString object;
	2760	* defaults to the current capacity if newLength is greater than that;
	2761	* if newLength==-1, it defaults to u_strlen(buffer) but not more than
	2762	* the current capacity of the string
	2763	*
	2764	* @see getBuffer(int32_t minCapacity)
	2765	* @stable ICU 2.0
	2766	*/
	2767	void releaseBuffer(int32_t newLength=-1);
	2768
	2769	/**
	2770	* Get a read-only pointer to the internal buffer.
	2771	* This can be called at any time on a valid UnicodeString.
	2772	*
	2773	* It returns 0 if the string is bogus, or
	2774	* during an "open" getBuffer(minCapacity).
	2775	*
	2776	* It can be called as many times as desired.
	2777	* The pointer that it returns will remain valid until the UnicodeString object is modified,
	2778	* at which time the pointer is semantically invalidated and must not be used any more.
	2779	*
	2780	* The capacity of the buffer can be determined with getCapacity().
	2781	* The part after length() may or may not be initialized and valid,
	2782	* depending on the history of the UnicodeString object.
	2783	*
	2784	* The buffer contents is (probably) not NUL-terminated.
	2785	* You can check if it is with
	2786	* <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
	2787	* (See getTerminatedBuffer().)
	2788	*
	2789	* The buffer may reside in read-only memory. Its contents must not
	2790	* be modified.
	2791	*
	2792	* @return a read-only pointer to the internal string buffer,
	2793	* or 0 if the string is empty or bogus
	2794	*
	2795	* @see getBuffer(int32_t minCapacity)
	2796	* @see getTerminatedBuffer()
	2797	* @stable ICU 2.0
	2798	*/
	2799	inline const UChar *getBuffer() const;
	2800
	2801	/**
	2802	* Get a read-only pointer to the internal buffer,
	2803	* making sure that it is NUL-terminated.
	2804	* This can be called at any time on a valid UnicodeString.
	2805	*
	2806	* It returns 0 if the string is bogus, or
	2807	* during an "open" getBuffer(minCapacity), or if the buffer cannot
	2808	* be NUL-terminated (because memory allocation failed).
	2809	*
	2810	* It can be called as many times as desired.
	2811	* The pointer that it returns will remain valid until the UnicodeString object is modified,
	2812	* at which time the pointer is semantically invalidated and must not be used any more.
	2813	*
	2814	* The capacity of the buffer can be determined with getCapacity().
	2815	* The part after length()+1 may or may not be initialized and valid,
	2816	* depending on the history of the UnicodeString object.
	2817	*
	2818	* The buffer contents is guaranteed to be NUL-terminated.
	2819	* getTerminatedBuffer() may reallocate the buffer if a terminating NUL
	2820	* is written.
	2821	* For this reason, this function is not const, unlike getBuffer().
	2822	* Note that a UnicodeString may also contain NUL characters as part of its contents.
	2823	*
	2824	* The buffer may reside in read-only memory. Its contents must not
	2825	* be modified.
	2826	*
	2827	* @return a read-only pointer to the internal string buffer,
	2828	* or 0 if the string is empty or bogus
	2829	*
	2830	* @see getBuffer(int32_t minCapacity)
	2831	* @see getBuffer()
	2832	* @stable ICU 2.2
	2833	*/
	2834	inline const UChar *getTerminatedBuffer();
	2835
	2836	//========================================
	2837	// Constructors
	2838	//========================================
	2839
	2840	/** Construct an empty UnicodeString.
	2841	* @stable ICU 2.0
	2842	*/
	2843	UnicodeString();
	2844
	2845	/**
	2846	* Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
	2847	* @param capacity the number of UChars this UnicodeString should hold
	2848	* before a resize is necessary; if count is greater than 0 and count
	2849	* code points c take up more space than capacity, then capacity is adjusted
	2850	* accordingly.
	2851	* @param c is used to initially fill the string
	2852	* @param count specifies how many code points c are to be written in the
	2853	* string
	2854	* @stable ICU 2.0
	2855	*/
	2856	UnicodeString(int32_t capacity, UChar32 c, int32_t count);
	2857
	2858	/**
	2859	* Single UChar (code unit) constructor.
	2860	*
	2861	* It is recommended to mark this constructor "explicit" by
	2862	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
	2863	* on the compiler command line or similar.
	2864	* @param ch the character to place in the UnicodeString
	2865	* @stable ICU 2.0
	2866	*/
	2867	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
	2868
	2869	/**
	2870	* Single UChar32 (code point) constructor.
	2871	*
	2872	* It is recommended to mark this constructor "explicit" by
	2873	* <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
	2874	* on the compiler command line or similar.
	2875	* @param ch the character to place in the UnicodeString
	2876	* @stable ICU 2.0
	2877	*/
	2878	UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
	2879
	2880	/**
	2881	* UChar* constructor.
	2882	*
	2883	* It is recommended to mark this constructor "explicit" by
	2884	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
	2885	* on the compiler command line or similar.
	2886	* @param text The characters to place in the UnicodeString. <TT>text</TT>
	2887	* must be NULL (U+0000) terminated.
	2888	* @stable ICU 2.0
	2889	*/
	2890	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
	2891
	2892	/**
	2893	* UChar* constructor.
	2894	* @param text The characters to place in the UnicodeString.
	2895	* @param textLength The number of Unicode characters in <TT>text</TT>
	2896	* to copy.
	2897	* @stable ICU 2.0
	2898	*/
	2899	UnicodeString(const UChar *text,
	2900	int32_t textLength);
	2901
	2902	/**
	2903	* Readonly-aliasing UChar* constructor.
	2904	* The text will be used for the UnicodeString object, but
	2905	* it will not be released when the UnicodeString is destroyed.
	2906	* This has copy-on-write semantics:
	2907	* When the string is modified, then the buffer is first copied into
	2908	* newly allocated memory.
	2909	* The aliased buffer is never modified.
	2910	* In an assignment to another UnicodeString, the text will be aliased again,
	2911	* so that both strings then alias the same readonly-text.
	2912	*
	2913	* @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
	2914	* This must be true if <code>textLength==-1</code>.
	2915	* @param text The characters to alias for the UnicodeString.
	2916	* @param textLength The number of Unicode characters in <code>text</code> to alias.
	2917	* If -1, then this constructor will determine the length
	2918	* by calling <code>u_strlen()</code>.
	2919	* @stable ICU 2.0
	2920	*/
	2921	UnicodeString(UBool isTerminated,
	2922	const UChar *text,
	2923	int32_t textLength);
	2924
	2925	/**
	2926	* Writable-aliasing UChar* constructor.
	2927	* The text will be used for the UnicodeString object, but
	2928	* it will not be released when the UnicodeString is destroyed.
	2929	* This has write-through semantics:
	2930	* For as long as the capacity of the buffer is sufficient, write operations
	2931	* will directly affect the buffer. When more capacity is necessary, then
	2932	* a new buffer will be allocated and the contents copied as with regularly
	2933	* constructed strings.
	2934	* In an assignment to another UnicodeString, the buffer will be copied.
	2935	* The extract(UChar *dst) function detects whether the dst pointer is the same
	2936	* as the string buffer itself and will in this case not copy the contents.
	2937	*
	2938	* @param buffer The characters to alias for the UnicodeString.
	2939	* @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
	2940	* @param buffCapacity The size of <code>buffer</code> in UChars.
	2941	* @stable ICU 2.0
	2942	*/
	2943	UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
	2944
	2945	#if U_CHARSET_IS_UTF8 \|\| !UCONFIG_NO_CONVERSION
	2946
	2947	/**
	2948	* char* constructor.
	2949	* Uses the default converter (and thus depends on the ICU conversion code)
	2950	* unless U_CHARSET_IS_UTF8 is set to 1.
	2951	*
	2952	* For ASCII (really "invariant character") strings it is more efficient to use
	2953	* the constructor that takes a US_INV (for its enum EInvariant).
	2954	* For ASCII (invariant-character) string literals, see UNICODE_STRING and
	2955	* UNICODE_STRING_SIMPLE.
	2956	*
	2957	* It is recommended to mark this constructor "explicit" by
	2958	* <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
	2959	* on the compiler command line or similar.
	2960	* @param codepageData an array of bytes, null-terminated,
	2961	* in the platform's default codepage.
	2962	* @stable ICU 2.0
	2963	* @see UNICODE_STRING
	2964	* @see UNICODE_STRING_SIMPLE
	2965	*/
	2966	UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
	2967
	2968	/**
	2969	* char* constructor.
	2970	* Uses the default converter (and thus depends on the ICU conversion code)
	2971	* unless U_CHARSET_IS_UTF8 is set to 1.
	2972	* @param codepageData an array of bytes in the platform's default codepage.
	2973	* @param dataLength The number of bytes in <TT>codepageData</TT>.
	2974	* @stable ICU 2.0
	2975	*/
	2976	UnicodeString(const char *codepageData, int32_t dataLength);
	2977
	2978	#endif
	2979
	2980	#if !UCONFIG_NO_CONVERSION
	2981
	2982	/**
	2983	* char* constructor.
	2984	* @param codepageData an array of bytes, null-terminated
	2985	* @param codepage the encoding of <TT>codepageData</TT>. The special
	2986	* value 0 for <TT>codepage</TT> indicates that the text is in the
	2987	* platform's default codepage.
	2988	*
	2989	* If <code>codepage</code> is an empty string (<code>""</code>),
	2990	* then a simple conversion is performed on the codepage-invariant
	2991	* subset ("invariant characters") of the platform encoding. See utypes.h.
	2992	* Recommendation: For invariant-character strings use the constructor
	2993	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
	2994	* because it avoids object code dependencies of UnicodeString on
	2995	* the conversion code.
	2996	*
	2997	* @stable ICU 2.0
	2998	*/
	2999	UnicodeString(const char codepageData, const char codepage);
	3000
	3001	/**
	3002	* char* constructor.
	3003	* @param codepageData an array of bytes.
	3004	* @param dataLength The number of bytes in <TT>codepageData</TT>.
	3005	* @param codepage the encoding of <TT>codepageData</TT>. The special
	3006	* value 0 for <TT>codepage</TT> indicates that the text is in the
	3007	* platform's default codepage.
	3008	* If <code>codepage</code> is an empty string (<code>""</code>),
	3009	* then a simple conversion is performed on the codepage-invariant
	3010	* subset ("invariant characters") of the platform encoding. See utypes.h.
	3011	* Recommendation: For invariant-character strings use the constructor
	3012	* UnicodeString(const char *src, int32_t length, enum EInvariant inv)
	3013	* because it avoids object code dependencies of UnicodeString on
	3014	* the conversion code.
	3015	*
	3016	* @stable ICU 2.0
	3017	*/
	3018	UnicodeString(const char codepageData, int32_t dataLength, const char codepage);
	3019
	3020	/**
	3021	* char * / UConverter constructor.
	3022	* This constructor uses an existing UConverter object to
	3023	* convert the codepage string to Unicode and construct a UnicodeString
	3024	* from that.
	3025	*
	3026	* The converter is reset at first.
	3027	* If the error code indicates a failure before this constructor is called,
	3028	* or if an error occurs during conversion or construction,
	3029	* then the string will be bogus.
	3030	*
	3031	* This function avoids the overhead of opening and closing a converter if
	3032	* multiple strings are constructed.
	3033	*
	3034	* @param src input codepage string
	3035	* @param srcLength length of the input string, can be -1 for NUL-terminated strings
	3036	* @param cnv converter object (ucnv_resetToUnicode() will be called),
	3037	* can be NULL for the default converter
	3038	* @param errorCode normal ICU error code
	3039	* @stable ICU 2.0
	3040	*/
	3041	UnicodeString(
	3042	const char *src, int32_t srcLength,
	3043	UConverter *cnv,
	3044	UErrorCode &errorCode);
	3045
	3046	#endif
	3047
	3048	/**
	3049	* Constructs a Unicode string from an invariant-character char * string.
	3050	* About invariant characters see utypes.h.
	3051	* This constructor has no runtime dependency on conversion code and is
	3052	* therefore recommended over ones taking a charset name string
	3053	* (where the empty string "" indicates invariant-character conversion).
	3054	*
	3055	* Use the macro US_INV as the third, signature-distinguishing parameter.
	3056	*
	3057	* For example:
	3058	* \code
	3059	* void fn(const char *s) {
	3060	* UnicodeString ustr(s, -1, US_INV);
	3061	* // use ustr ...
	3062	* }
	3063	* \endcode
	3064	*
	3065	* @param src String using only invariant characters.
	3066	* @param length Length of src, or -1 if NUL-terminated.
	3067	* @param inv Signature-distinguishing paramater, use US_INV.
	3068	*
	3069	* @see US_INV
	3070	* @stable ICU 3.2
	3071	*/
	3072	UnicodeString(const char *src, int32_t length, enum EInvariant inv);
	3073
	3074
	3075	/**
	3076	* Copy constructor.
	3077	* @param that The UnicodeString object to copy.
	3078	* @stable ICU 2.0
	3079	*/
	3080	UnicodeString(const UnicodeString& that);
	3081
	3082	/**
	3083	* 'Substring' constructor from tail of source string.
	3084	* @param src The UnicodeString object to copy.
	3085	* @param srcStart The offset into <tt>src</tt> at which to start copying.
	3086	* @stable ICU 2.2
	3087	*/
	3088	UnicodeString(const UnicodeString& src, int32_t srcStart);
	3089
	3090	/**
	3091	* 'Substring' constructor from subrange of source string.
	3092	* @param src The UnicodeString object to copy.
	3093	* @param srcStart The offset into <tt>src</tt> at which to start copying.
	3094	* @param srcLength The number of characters from <tt>src</tt> to copy.
	3095	* @stable ICU 2.2
	3096	*/
	3097	UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
	3098
	3099	/**
	3100	* Clone this object, an instance of a subclass of Replaceable.
	3101	* Clones can be used concurrently in multiple threads.
	3102	* If a subclass does not implement clone(), or if an error occurs,
	3103	* then NULL is returned.
	3104	* The clone functions in all subclasses return a pointer to a Replaceable
	3105	* because some compilers do not support covariant (same-as-this)
	3106	* return types; cast to the appropriate subclass if necessary.
	3107	* The caller must delete the clone.
	3108	*
	3109	* @return a clone of this object
	3110	*
	3111	* @see Replaceable::clone
	3112	* @see getDynamicClassID
	3113	* @stable ICU 2.6
	3114	*/
	3115	virtual Replaceable *clone() const;
	3116
	3117	/** Destructor.
	3118	* @stable ICU 2.0
	3119	*/
	3120	virtual ~UnicodeString();
	3121
	3122	/**
	3123	* Create a UnicodeString from a UTF-8 string.
	3124	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
	3125	* Calls u_strFromUTF8WithSub().
	3126	*
	3127	* @param utf8 UTF-8 input string.
	3128	* Note that a StringPiece can be implicitly constructed
	3129	* from a std::string or a NUL-terminated const char * string.
	3130	* @return A UnicodeString with equivalent UTF-16 contents.
	3131	* @see toUTF8
	3132	* @see toUTF8String
	3133	* @stable ICU 4.2
	3134	*/
	3135	static UnicodeString fromUTF8(const StringPiece &utf8);
	3136
	3137	/**
	3138	* Create a UnicodeString from a UTF-32 string.
	3139	* Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
	3140	* Calls u_strFromUTF32WithSub().
	3141	*
	3142	* @param utf32 UTF-32 input string. Must not be NULL.
	3143	* @param length Length of the input string, or -1 if NUL-terminated.
	3144	* @return A UnicodeString with equivalent UTF-16 contents.
	3145	* @see toUTF32
	3146	* @stable ICU 4.2
	3147	*/
	3148	static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
	3149
	3150	/* Miscellaneous operations */
	3151
	3152	/**
	3153	* Unescape a string of characters and return a string containing
	3154	* the result. The following escape sequences are recognized:
	3155	*
	3156	* \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
	3157	* \\Uhhhhhhhh 8 hex digits
	3158	* \\xhh 1-2 hex digits
	3159	* \\ooo 1-3 octal digits; o in [0-7]
	3160	* \\cX control-X; X is masked with 0x1F
	3161	*
	3162	* as well as the standard ANSI C escapes:
	3163	*
	3164	* \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
	3165	* \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
	3166	* \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
	3167	*
	3168	* Anything else following a backslash is generically escaped. For
	3169	* example, "[a\\-z]" returns "[a-z]".
	3170	*
	3171	* If an escape sequence is ill-formed, this method returns an empty
	3172	* string. An example of an ill-formed sequence is "\\u" followed by
	3173	* fewer than 4 hex digits.
	3174	*
	3175	* This function is similar to u_unescape() but not identical to it.
	3176	* The latter takes a source char*, so it does escape recognition
	3177	* and also invariant conversion.
	3178	*
	3179	* @return a string with backslash escapes interpreted, or an
	3180	* empty string on error.
	3181	* @see UnicodeString#unescapeAt()
	3182	* @see u_unescape()
	3183	* @see u_unescapeAt()
	3184	* @stable ICU 2.0
	3185	*/
	3186	UnicodeString unescape() const;
	3187
	3188	/**
	3189	* Unescape a single escape sequence and return the represented
	3190	* character. See unescape() for a listing of the recognized escape
	3191	* sequences. The character at offset-1 is assumed (without
	3192	* checking) to be a backslash. If the escape sequence is
	3193	* ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
	3194	* returned.
	3195	*
	3196	* @param offset an input output parameter. On input, it is the
	3197	* offset into this string where the escape sequence is located,
	3198	* after the initial backslash. On output, it is advanced after the
	3199	* last character parsed. On error, it is not advanced at all.
	3200	* @return the character represented by the escape sequence at
	3201	* offset, or (UChar32)0xFFFFFFFF on error.
	3202	* @see UnicodeString#unescape()
	3203	* @see u_unescape()
	3204	* @see u_unescapeAt()
	3205	* @stable ICU 2.0
	3206	*/
	3207	UChar32 unescapeAt(int32_t &offset) const;
	3208
	3209	/**
	3210	* ICU "poor man's RTTI", returns a UClassID for this class.
	3211	*
	3212	* @stable ICU 2.2
	3213	*/
	3214	static UClassID U_EXPORT2 getStaticClassID();
	3215
	3216	/**
	3217	* ICU "poor man's RTTI", returns a UClassID for the actual class.
	3218	*
	3219	* @stable ICU 2.2
	3220	*/
	3221	virtual UClassID getDynamicClassID() const;
	3222
	3223	//========================================
	3224	// Implementation methods
	3225	//========================================
	3226
	3227	protected:
	3228	/**
	3229	* Implement Replaceable::getLength() (see jitterbug 1027).
	3230	* @stable ICU 2.4
	3231	*/
	3232	virtual int32_t getLength() const;
	3233
	3234	/**
	3235	* The change in Replaceable to use virtual getCharAt() allows
	3236	* UnicodeString::charAt() to be inline again (see jitterbug 709).
	3237	* @stable ICU 2.4
	3238	*/
	3239	virtual UChar getCharAt(int32_t offset) const;
	3240
	3241	/**
	3242	* The change in Replaceable to use virtual getChar32At() allows
	3243	* UnicodeString::char32At() to be inline again (see jitterbug 709).
	3244	* @stable ICU 2.4
	3245	*/
	3246	virtual UChar32 getChar32At(int32_t offset) const;
	3247
	3248	private:
	3249	// For char* constructors. Could be made public.
	3250	UnicodeString &setToUTF8(const StringPiece &utf8);
	3251	// For extract(char*).
	3252	// We could make a toUTF8(target, capacity, errorCode) public but not
	3253	// this version: New API will be cleaner if we make callers create substrings
	3254	// rather than having start+length on every method,
	3255	// and it should take a UErrorCode&.
	3256	int32_t
	3257	toUTF8(int32_t start, int32_t len,
	3258	char *target, int32_t capacity) const;
	3259
	3260
	3261	inline int8_t
	3262	doCompare(int32_t start,
	3263	int32_t length,
	3264	const UnicodeString& srcText,
	3265	int32_t srcStart,
	3266	int32_t srcLength) const;
	3267
	3268	int8_t doCompare(int32_t start,
	3269	int32_t length,
	3270	const UChar *srcChars,
	3271	int32_t srcStart,
	3272	int32_t srcLength) const;
	3273
	3274	inline int8_t
	3275	doCompareCodePointOrder(int32_t start,
	3276	int32_t length,
	3277	const UnicodeString& srcText,
	3278	int32_t srcStart,
	3279	int32_t srcLength) const;
	3280
	3281	int8_t doCompareCodePointOrder(int32_t start,
	3282	int32_t length,
	3283	const UChar *srcChars,
	3284	int32_t srcStart,
	3285	int32_t srcLength) const;
	3286
	3287	inline int8_t
	3288	doCaseCompare(int32_t start,
	3289	int32_t length,
	3290	const UnicodeString &srcText,
	3291	int32_t srcStart,
	3292	int32_t srcLength,
	3293	uint32_t options) const;
	3294
	3295	int8_t
	3296	doCaseCompare(int32_t start,
	3297	int32_t length,
	3298	const UChar *srcChars,
	3299	int32_t srcStart,
	3300	int32_t srcLength,
	3301	uint32_t options) const;
	3302
	3303	int32_t doIndexOf(UChar c,
	3304	int32_t start,
	3305	int32_t length) const;
	3306
	3307	int32_t doIndexOf(UChar32 c,
	3308	int32_t start,
	3309	int32_t length) const;
	3310
	3311	int32_t doLastIndexOf(UChar c,
	3312	int32_t start,
	3313	int32_t length) const;
	3314
	3315	int32_t doLastIndexOf(UChar32 c,
	3316	int32_t start,
	3317	int32_t length) const;
	3318
	3319	void doExtract(int32_t start,
	3320	int32_t length,
	3321	UChar *dst,
	3322	int32_t dstStart) const;
	3323
	3324	inline void doExtract(int32_t start,
	3325	int32_t length,
	3326	UnicodeString& target) const;
	3327
	3328	inline UChar doCharAt(int32_t offset) const;
	3329
	3330	UnicodeString& doReplace(int32_t start,
	3331	int32_t length,
	3332	const UnicodeString& srcText,
	3333	int32_t srcStart,
	3334	int32_t srcLength);
	3335
	3336	UnicodeString& doReplace(int32_t start,
	3337	int32_t length,
	3338	const UChar *srcChars,
	3339	int32_t srcStart,
	3340	int32_t srcLength);
	3341
	3342	UnicodeString& doReverse(int32_t start,
	3343	int32_t length);
	3344
	3345	// calculate hash code
	3346	int32_t doHashCode(void) const;
	3347
	3348	// get pointer to start of array
	3349	// these do not check for kOpenGetBuffer, unlike the public getBuffer() function
	3350	inline UChar* getArrayStart(void);
	3351	inline const UChar* getArrayStart(void) const;
	3352
	3353	// A UnicodeString object (not necessarily its current buffer)
	3354	// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
	3355	inline UBool isWritable() const;
	3356
	3357	// Is the current buffer writable?
	3358	inline UBool isBufferWritable() const;
	3359
	3360	// None of the following does releaseArray().
	3361	inline void setLength(int32_t len); // sets only fShortLength and fLength
	3362	inline void setToEmpty(); // sets fFlags=kShortString
	3363	inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
	3364
	3365	// allocate the array; result may be fStackBuffer
	3366	// sets refCount to 1 if appropriate
	3367	// sets fArray, fCapacity, and fFlags
	3368	// returns boolean for success or failure
	3369	UBool allocate(int32_t capacity);
	3370
	3371	// release the array if owned
	3372	void releaseArray(void);
	3373
	3374	// turn a bogus string into an empty one
	3375	void unBogus();
	3376
	3377	// implements assigment operator, copy constructor, and fastCopyFrom()
	3378	UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
	3379
	3380	// Pin start and limit to acceptable values.
	3381	inline void pinIndex(int32_t& start) const;
	3382	inline void pinIndices(int32_t& start,
	3383	int32_t& length) const;
	3384
	3385	#if !UCONFIG_NO_CONVERSION
	3386
	3387	/* Internal extract() using UConverter. */
	3388	int32_t doExtract(int32_t start, int32_t length,
	3389	char *dest, int32_t destCapacity,
	3390	UConverter *cnv,
	3391	UErrorCode &errorCode) const;
	3392
	3393	/*
	3394	* Real constructor for converting from codepage data.
	3395	* It assumes that it is called with !fRefCounted.
	3396	*
	3397	* If <code>codepage==0</code>, then the default converter
	3398	* is used for the platform encoding.
	3399	* If <code>codepage</code> is an empty string (<code>""</code>),
	3400	* then a simple conversion is performed on the codepage-invariant
	3401	* subset ("invariant characters") of the platform encoding. See utypes.h.
	3402	*/
	3403	void doCodepageCreate(const char *codepageData,
	3404	int32_t dataLength,
	3405	const char *codepage);
	3406
	3407	/*
	3408	* Worker function for creating a UnicodeString from
	3409	* a codepage string using a UConverter.
	3410	*/
	3411	void
	3412	doCodepageCreate(const char *codepageData,
	3413	int32_t dataLength,
	3414	UConverter *converter,
	3415	UErrorCode &status);
	3416
	3417	#endif
	3418
	3419	/*
	3420	* This function is called when write access to the array
	3421	* is necessary.
	3422	*
	3423	* We need to make a copy of the array if
	3424	* the buffer is read-only, or
	3425	* the buffer is refCounted (shared), and refCount>1, or
	3426	* the buffer is too small.
	3427	*
	3428	* Return FALSE if memory could not be allocated.
	3429	*/
	3430	UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
	3431	int32_t growCapacity = -1,
	3432	UBool doCopyArray = TRUE,
	3433	int32_t **pBufferToDelete = 0,
	3434	UBool forceClone = FALSE);
	3435
	3436	/**
	3437	* Common function for UnicodeString case mappings.
	3438	* The stringCaseMapper has the same type UStringCaseMapper
	3439	* as in ustr_imp.h for ustrcase_map().
	3440	*/
	3441	UnicodeString &
	3442	caseMap(const UCaseMap csm, UStringCaseMapper stringCaseMapper);
	3443
	3444	// ref counting
	3445	void addRef(void);
	3446	int32_t removeRef(void);
	3447	int32_t refCount(void) const;
	3448
	3449	// constants
	3450	enum {
	3451	// Set the stack buffer size so that sizeof(UnicodeString) is,
	3452	// naturally (without padding), a multiple of sizeof(pointer).
	3453	US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
	3454	kInvalidUChar=0xffff, // invalid UChar index
	3455	kGrowSize=128, // grow size for this buffer
	3456	kInvalidHashCode=0, // invalid hash code
	3457	kEmptyHashCode=1, // hash code for empty string
	3458
	3459	// bit flag values for fFlags
	3460	kIsBogus=1, // this string is bogus, i.e., not valid or NULL
	3461	kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
	3462	kRefCounted=4, // there is a refCount field before the characters in fArray
	3463	kBufferIsReadonly=8,// do not write to this buffer
	3464	kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
	3465	// and releaseBuffer(newLength) must be called
	3466
	3467	// combined values for convenience
	3468	kShortString=kUsingStackBuffer,
	3469	kLongString=kRefCounted,
	3470	kReadonlyAlias=kBufferIsReadonly,
	3471	kWritableAlias=0
	3472	};
	3473
	3474	friend class StringThreadTest;
	3475	friend class UnicodeStringAppendable;
	3476
	3477	union StackBufferOrFields; // forward declaration necessary before friend declaration
	3478	friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
	3479
	3480	/*
	3481	* The following are all the class fields that are stored
	3482	* in each UnicodeString object.
	3483	* Note that UnicodeString has virtual functions,
	3484	* therefore there is an implicit vtable pointer
	3485	* as the first real field.
	3486	* The fields should be aligned such that no padding is necessary.
	3487	* On 32-bit machines, the size should be 32 bytes,
	3488	* on 64-bit machines (8-byte pointers), it should be 40 bytes.
	3489	*
	3490	* We use a hack to achieve this.
	3491	*
	3492	* With at least some compilers, each of the following is forced to
	3493	* a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
	3494	* rounded up with additional padding if the fields do not already fit that requirement:
	3495	* - sizeof(class UnicodeString)
	3496	* - offsetof(UnicodeString, fUnion)
	3497	* - sizeof(fUnion)
	3498	* - sizeof(fFields)
	3499	*
	3500	* In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
	3501	* which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
	3502	* (Padding at the end of fFields is ok:
	3503	* As long as there is no padding after fStackBuffer, it is not wasted space.)
	3504	*
	3505	* We further assume that the compiler does not reorder the fields,
	3506	* so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
	3507	* with at most some padding (but no other field) in between.
	3508	* (Padding there would be wasted space, but functionally harmless.)
	3509	*
	3510	* We use a few more sizeof(pointer)'s chunks of space with
	3511	* fRestOfStackBuffer, fShortLength and fFlags,
	3512	* to get up exactly to the intended sizeof(UnicodeString).
	3513	*/
	3514	// (implicit) *vtable;
	3515	union StackBufferOrFields {
	3516	// fStackBuffer is used iff (fFlags&kUsingStackBuffer)
	3517	// else fFields is used
	3518	UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
	3519	struct {
	3520	UChar *fArray; // the Unicode data
	3521	int32_t fCapacity; // capacity of fArray (in UChars)
	3522	int32_t fLength; // number of characters in fArray if >127; else undefined
	3523	} fFields;
	3524	} fUnion;
	3525	UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
	3526	int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
	3527	uint8_t fFlags; // bit flags: see constants above
	3528	};
	3529
	3530	/**
	3531	* Create a new UnicodeString with the concatenation of two others.
	3532	*
	3533	* @param s1 The first string to be copied to the new one.
	3534	* @param s2 The second string to be copied to the new one, after s1.
	3535	* @return UnicodeString(s1).append(s2)
	3536	* @stable ICU 2.8
	3537	*/
	3538	U_COMMON_API UnicodeString U_EXPORT2
	3539	operator+ (const UnicodeString &s1, const UnicodeString &s2);
	3540
	3541	//========================================
	3542	// Inline members
	3543	//========================================
	3544
	3545	//========================================
	3546	// Privates
	3547	//========================================
	3548
	3549	inline void
	3550	UnicodeString::pinIndex(int32_t& start) const
	3551	{
	3552	// pin index
	3553	if(start < 0) {
	3554	start = 0;
	3555	} else if(start > length()) {
	3556	start = length();
	3557	}
	3558	}
	3559
	3560	inline void
	3561	UnicodeString::pinIndices(int32_t& start,
	3562	int32_t& _length) const
	3563	{
	3564	// pin indices
	3565	int32_t len = length();
	3566	if(start < 0) {
	3567	start = 0;
	3568	} else if(start > len) {
	3569	start = len;
	3570	}
	3571	if(_length < 0) {
	3572	_length = 0;
	3573	} else if(_length > (len - start)) {
	3574	_length = (len - start);
	3575	}
	3576	}
	3577
	3578	inline UChar*
	3579	UnicodeString::getArrayStart()
	3580	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
	3581
	3582	inline const UChar*
	3583	UnicodeString::getArrayStart() const
	3584	{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
	3585
	3586	//========================================
	3587	// Read-only implementation methods
	3588	//========================================
	3589	inline int32_t
	3590	UnicodeString::length() const
	3591	{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
	3592
	3593	inline int32_t
	3594	UnicodeString::getCapacity() const
	3595	{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
	3596
	3597	inline int32_t
	3598	UnicodeString::hashCode() const
	3599	{ return doHashCode(); }
	3600
	3601	inline UBool
	3602	UnicodeString::isBogus() const
	3603	{ return (UBool)(fFlags & kIsBogus); }
	3604
	3605	inline UBool
	3606	UnicodeString::isWritable() const
	3607	{ return (UBool)!(fFlags&(kOpenGetBuffer\|kIsBogus)); }
	3608
	3609	inline UBool
	3610	UnicodeString::isBufferWritable() const
	3611	{
	3612	return (UBool)(
	3613	!(fFlags&(kOpenGetBuffer\|kIsBogus\|kBufferIsReadonly)) &&
	3614	(!(fFlags&kRefCounted) \|\| refCount()==1));
	3615	}
	3616
	3617	inline const UChar *
	3618	UnicodeString::getBuffer() const {
	3619	if(fFlags&(kIsBogus\|kOpenGetBuffer)) {
	3620	return 0;
	3621	} else if(fFlags&kUsingStackBuffer) {
	3622	return fUnion.fStackBuffer;
	3623	} else {
	3624	return fUnion.fFields.fArray;
	3625	}
	3626	}
	3627
	3628	//========================================
	3629	// Read-only alias methods
	3630	//========================================
	3631	inline int8_t
	3632	UnicodeString::doCompare(int32_t start,
	3633	int32_t thisLength,
	3634	const UnicodeString& srcText,
	3635	int32_t srcStart,
	3636	int32_t srcLength) const
	3637	{
	3638	if(srcText.isBogus()) {
	3639	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3640	} else {
	3641	srcText.pinIndices(srcStart, srcLength);
	3642	return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
	3643	}
	3644	}
	3645
	3646	inline UBool
	3647	UnicodeString::operator== (const UnicodeString& text) const
	3648	{
	3649	if(isBogus()) {
	3650	return text.isBogus();
	3651	} else {
	3652	int32_t len = length(), textLength = text.length();
	3653	return
	3654	!text.isBogus() &&
	3655	len == textLength &&
	3656	doCompare(0, len, text, 0, textLength) == 0;
	3657	}
	3658	}
	3659
	3660	inline UBool
	3661	UnicodeString::operator!= (const UnicodeString& text) const
	3662	{ return (! operator==(text)); }
	3663
	3664	inline UBool
	3665	UnicodeString::operator> (const UnicodeString& text) const
	3666	{ return doCompare(0, length(), text, 0, text.length()) == 1; }
	3667
	3668	inline UBool
	3669	UnicodeString::operator< (const UnicodeString& text) const
	3670	{ return doCompare(0, length(), text, 0, text.length()) == -1; }
	3671
	3672	inline UBool
	3673	UnicodeString::operator>= (const UnicodeString& text) const
	3674	{ return doCompare(0, length(), text, 0, text.length()) != -1; }
	3675
	3676	inline UBool
	3677	UnicodeString::operator<= (const UnicodeString& text) const
	3678	{ return doCompare(0, length(), text, 0, text.length()) != 1; }
	3679
	3680	inline int8_t
	3681	UnicodeString::compare(const UnicodeString& text) const
	3682	{ return doCompare(0, length(), text, 0, text.length()); }
	3683
	3684	inline int8_t
	3685	UnicodeString::compare(int32_t start,
	3686	int32_t _length,
	3687	const UnicodeString& srcText) const
	3688	{ return doCompare(start, _length, srcText, 0, srcText.length()); }
	3689
	3690	inline int8_t
	3691	UnicodeString::compare(const UChar *srcChars,
	3692	int32_t srcLength) const
	3693	{ return doCompare(0, length(), srcChars, 0, srcLength); }
	3694
	3695	inline int8_t
	3696	UnicodeString::compare(int32_t start,
	3697	int32_t _length,
	3698	const UnicodeString& srcText,
	3699	int32_t srcStart,
	3700	int32_t srcLength) const
	3701	{ return doCompare(start, _length, srcText, srcStart, srcLength); }
	3702
	3703	inline int8_t
	3704	UnicodeString::compare(int32_t start,
	3705	int32_t _length,
	3706	const UChar *srcChars) const
	3707	{ return doCompare(start, _length, srcChars, 0, _length); }
	3708
	3709	inline int8_t
	3710	UnicodeString::compare(int32_t start,
	3711	int32_t _length,
	3712	const UChar *srcChars,
	3713	int32_t srcStart,
	3714	int32_t srcLength) const
	3715	{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
	3716
	3717	inline int8_t
	3718	UnicodeString::compareBetween(int32_t start,
	3719	int32_t limit,
	3720	const UnicodeString& srcText,
	3721	int32_t srcStart,
	3722	int32_t srcLimit) const
	3723	{ return doCompare(start, limit - start,
	3724	srcText, srcStart, srcLimit - srcStart); }
	3725
	3726	inline int8_t
	3727	UnicodeString::doCompareCodePointOrder(int32_t start,
	3728	int32_t thisLength,
	3729	const UnicodeString& srcText,
	3730	int32_t srcStart,
	3731	int32_t srcLength) const
	3732	{
	3733	if(srcText.isBogus()) {
	3734	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3735	} else {
	3736	srcText.pinIndices(srcStart, srcLength);
	3737	return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
	3738	}
	3739	}
	3740
	3741	inline int8_t
	3742	UnicodeString::compareCodePointOrder(const UnicodeString& text) const
	3743	{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
	3744
	3745	inline int8_t
	3746	UnicodeString::compareCodePointOrder(int32_t start,
	3747	int32_t _length,
	3748	const UnicodeString& srcText) const
	3749	{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
	3750
	3751	inline int8_t
	3752	UnicodeString::compareCodePointOrder(const UChar *srcChars,
	3753	int32_t srcLength) const
	3754	{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
	3755
	3756	inline int8_t
	3757	UnicodeString::compareCodePointOrder(int32_t start,
	3758	int32_t _length,
	3759	const UnicodeString& srcText,
	3760	int32_t srcStart,
	3761	int32_t srcLength) const
	3762	{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
	3763
	3764	inline int8_t
	3765	UnicodeString::compareCodePointOrder(int32_t start,
	3766	int32_t _length,
	3767	const UChar *srcChars) const
	3768	{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
	3769
	3770	inline int8_t
	3771	UnicodeString::compareCodePointOrder(int32_t start,
	3772	int32_t _length,
	3773	const UChar *srcChars,
	3774	int32_t srcStart,
	3775	int32_t srcLength) const
	3776	{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
	3777
	3778	inline int8_t
	3779	UnicodeString::compareCodePointOrderBetween(int32_t start,
	3780	int32_t limit,
	3781	const UnicodeString& srcText,
	3782	int32_t srcStart,
	3783	int32_t srcLimit) const
	3784	{ return doCompareCodePointOrder(start, limit - start,
	3785	srcText, srcStart, srcLimit - srcStart); }
	3786
	3787	inline int8_t
	3788	UnicodeString::doCaseCompare(int32_t start,
	3789	int32_t thisLength,
	3790	const UnicodeString &srcText,
	3791	int32_t srcStart,
	3792	int32_t srcLength,
	3793	uint32_t options) const
	3794	{
	3795	if(srcText.isBogus()) {
	3796	return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
	3797	} else {
	3798	srcText.pinIndices(srcStart, srcLength);
	3799	return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
	3800	}
	3801	}
	3802
	3803	inline int8_t
	3804	UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
	3805	return doCaseCompare(0, length(), text, 0, text.length(), options);
	3806	}
	3807
	3808	inline int8_t
	3809	UnicodeString::caseCompare(int32_t start,
	3810	int32_t _length,
	3811	const UnicodeString &srcText,
	3812	uint32_t options) const {
	3813	return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
	3814	}
	3815
	3816	inline int8_t
	3817	UnicodeString::caseCompare(const UChar *srcChars,
	3818	int32_t srcLength,
	3819	uint32_t options) const {
	3820	return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
	3821	}
	3822
	3823	inline int8_t
	3824	UnicodeString::caseCompare(int32_t start,
	3825	int32_t _length,
	3826	const UnicodeString &srcText,
	3827	int32_t srcStart,
	3828	int32_t srcLength,
	3829	uint32_t options) const {
	3830	return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
	3831	}
	3832
	3833	inline int8_t
	3834	UnicodeString::caseCompare(int32_t start,
	3835	int32_t _length,
	3836	const UChar *srcChars,
	3837	uint32_t options) const {
	3838	return doCaseCompare(start, _length, srcChars, 0, _length, options);
	3839	}
	3840
	3841	inline int8_t
	3842	UnicodeString::caseCompare(int32_t start,
	3843	int32_t _length,
	3844	const UChar *srcChars,
	3845	int32_t srcStart,
	3846	int32_t srcLength,
	3847	uint32_t options) const {
	3848	return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
	3849	}
	3850
	3851	inline int8_t
	3852	UnicodeString::caseCompareBetween(int32_t start,
	3853	int32_t limit,
	3854	const UnicodeString &srcText,
	3855	int32_t srcStart,
	3856	int32_t srcLimit,
	3857	uint32_t options) const {
	3858	return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
	3859	}
	3860
	3861	inline int32_t
	3862	UnicodeString::indexOf(const UnicodeString& srcText,
	3863	int32_t srcStart,
	3864	int32_t srcLength,
	3865	int32_t start,
	3866	int32_t _length) const
	3867	{
	3868	if(!srcText.isBogus()) {
	3869	srcText.pinIndices(srcStart, srcLength);
	3870	if(srcLength > 0) {
	3871	return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
	3872	}
	3873	}
	3874	return -1;
	3875	}
	3876
	3877	inline int32_t
	3878	UnicodeString::indexOf(const UnicodeString& text) const
	3879	{ return indexOf(text, 0, text.length(), 0, length()); }
	3880
	3881	inline int32_t
	3882	UnicodeString::indexOf(const UnicodeString& text,
	3883	int32_t start) const {
	3884	pinIndex(start);
	3885	return indexOf(text, 0, text.length(), start, length() - start);
	3886	}
	3887
	3888	inline int32_t
	3889	UnicodeString::indexOf(const UnicodeString& text,
	3890	int32_t start,
	3891	int32_t _length) const
	3892	{ return indexOf(text, 0, text.length(), start, _length); }
	3893
	3894	inline int32_t
	3895	UnicodeString::indexOf(const UChar *srcChars,
	3896	int32_t srcLength,
	3897	int32_t start) const {
	3898	pinIndex(start);
	3899	return indexOf(srcChars, 0, srcLength, start, length() - start);
	3900	}
	3901
	3902	inline int32_t
	3903	UnicodeString::indexOf(const UChar *srcChars,
	3904	int32_t srcLength,
	3905	int32_t start,
	3906	int32_t _length) const
	3907	{ return indexOf(srcChars, 0, srcLength, start, _length); }
	3908
	3909	inline int32_t
	3910	UnicodeString::indexOf(UChar c,
	3911	int32_t start,
	3912	int32_t _length) const
	3913	{ return doIndexOf(c, start, _length); }
	3914
	3915	inline int32_t
	3916	UnicodeString::indexOf(UChar32 c,
	3917	int32_t start,
	3918	int32_t _length) const
	3919	{ return doIndexOf(c, start, _length); }
	3920
	3921	inline int32_t
	3922	UnicodeString::indexOf(UChar c) const
	3923	{ return doIndexOf(c, 0, length()); }
	3924
	3925	inline int32_t
	3926	UnicodeString::indexOf(UChar32 c) const
	3927	{ return indexOf(c, 0, length()); }
	3928
	3929	inline int32_t
	3930	UnicodeString::indexOf(UChar c,
	3931	int32_t start) const {
	3932	pinIndex(start);
	3933	return doIndexOf(c, start, length() - start);
	3934	}
	3935
	3936	inline int32_t
	3937	UnicodeString::indexOf(UChar32 c,
	3938	int32_t start) const {
	3939	pinIndex(start);
	3940	return indexOf(c, start, length() - start);
	3941	}
	3942
	3943	inline int32_t
	3944	UnicodeString::lastIndexOf(const UChar *srcChars,
	3945	int32_t srcLength,
	3946	int32_t start,
	3947	int32_t _length) const
	3948	{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
	3949
	3950	inline int32_t
	3951	UnicodeString::lastIndexOf(const UChar *srcChars,
	3952	int32_t srcLength,
	3953	int32_t start) const {
	3954	pinIndex(start);
	3955	return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
	3956	}
	3957
	3958	inline int32_t
	3959	UnicodeString::lastIndexOf(const UnicodeString& srcText,
	3960	int32_t srcStart,
	3961	int32_t srcLength,
	3962	int32_t start,
	3963	int32_t _length) const
	3964	{
	3965	if(!srcText.isBogus()) {
	3966	srcText.pinIndices(srcStart, srcLength);
	3967	if(srcLength > 0) {
	3968	return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
	3969	}
	3970	}
	3971	return -1;
	3972	}
	3973
	3974	inline int32_t
	3975	UnicodeString::lastIndexOf(const UnicodeString& text,
	3976	int32_t start,
	3977	int32_t _length) const
	3978	{ return lastIndexOf(text, 0, text.length(), start, _length); }
	3979
	3980	inline int32_t
	3981	UnicodeString::lastIndexOf(const UnicodeString& text,
	3982	int32_t start) const {
	3983	pinIndex(start);
	3984	return lastIndexOf(text, 0, text.length(), start, length() - start);
	3985	}
	3986
	3987	inline int32_t
	3988	UnicodeString::lastIndexOf(const UnicodeString& text) const
	3989	{ return lastIndexOf(text, 0, text.length(), 0, length()); }
	3990
	3991	inline int32_t
	3992	UnicodeString::lastIndexOf(UChar c,
	3993	int32_t start,
	3994	int32_t _length) const
	3995	{ return doLastIndexOf(c, start, _length); }
	3996
	3997	inline int32_t
	3998	UnicodeString::lastIndexOf(UChar32 c,
	3999	int32_t start,
	4000	int32_t _length) const {
	4001	return doLastIndexOf(c, start, _length);
	4002	}
	4003
	4004	inline int32_t
	4005	UnicodeString::lastIndexOf(UChar c) const
	4006	{ return doLastIndexOf(c, 0, length()); }
	4007
	4008	inline int32_t
	4009	UnicodeString::lastIndexOf(UChar32 c) const {
	4010	return lastIndexOf(c, 0, length());
	4011	}
	4012
	4013	inline int32_t
	4014	UnicodeString::lastIndexOf(UChar c,
	4015	int32_t start) const {
	4016	pinIndex(start);
	4017	return doLastIndexOf(c, start, length() - start);
	4018	}
	4019
	4020	inline int32_t
	4021	UnicodeString::lastIndexOf(UChar32 c,
	4022	int32_t start) const {
	4023	pinIndex(start);
	4024	return lastIndexOf(c, start, length() - start);
	4025	}
	4026
	4027	inline UBool
	4028	UnicodeString::startsWith(const UnicodeString& text) const
	4029	{ return compare(0, text.length(), text, 0, text.length()) == 0; }
	4030
	4031	inline UBool
	4032	UnicodeString::startsWith(const UnicodeString& srcText,
	4033	int32_t srcStart,
	4034	int32_t srcLength) const
	4035	{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
	4036
	4037	inline UBool
	4038	UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
	4039	if(srcLength < 0) {
	4040	srcLength = u_strlen(srcChars);
	4041	}
	4042	return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
	4043	}
	4044
	4045	inline UBool
	4046	UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
	4047	if(srcLength < 0) {
	4048	srcLength = u_strlen(srcChars);
	4049	}
	4050	return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
	4051	}
	4052
	4053	inline UBool
	4054	UnicodeString::endsWith(const UnicodeString& text) const
	4055	{ return doCompare(length() - text.length(), text.length(),
	4056	text, 0, text.length()) == 0; }
	4057
	4058	inline UBool
	4059	UnicodeString::endsWith(const UnicodeString& srcText,
	4060	int32_t srcStart,
	4061	int32_t srcLength) const {
	4062	srcText.pinIndices(srcStart, srcLength);
	4063	return doCompare(length() - srcLength, srcLength,
	4064	srcText, srcStart, srcLength) == 0;
	4065	}
	4066
	4067	inline UBool
	4068	UnicodeString::endsWith(const UChar *srcChars,
	4069	int32_t srcLength) const {
	4070	if(srcLength < 0) {
	4071	srcLength = u_strlen(srcChars);
	4072	}
	4073	return doCompare(length() - srcLength, srcLength,
	4074	srcChars, 0, srcLength) == 0;
	4075	}
	4076
	4077	inline UBool
	4078	UnicodeString::endsWith(const UChar *srcChars,
	4079	int32_t srcStart,
	4080	int32_t srcLength) const {
	4081	if(srcLength < 0) {
	4082	srcLength = u_strlen(srcChars + srcStart);
	4083	}
	4084	return doCompare(length() - srcLength, srcLength,
	4085	srcChars, srcStart, srcLength) == 0;
	4086	}
	4087
	4088	//========================================
	4089	// replace
	4090	//========================================
	4091	inline UnicodeString&
	4092	UnicodeString::replace(int32_t start,
	4093	int32_t _length,
	4094	const UnicodeString& srcText)
	4095	{ return doReplace(start, _length, srcText, 0, srcText.length()); }
	4096
	4097	inline UnicodeString&
	4098	UnicodeString::replace(int32_t start,
	4099	int32_t _length,
	4100	const UnicodeString& srcText,
	4101	int32_t srcStart,
	4102	int32_t srcLength)
	4103	{ return doReplace(start, _length, srcText, srcStart, srcLength); }
	4104
	4105	inline UnicodeString&
	4106	UnicodeString::replace(int32_t start,
	4107	int32_t _length,
	4108	const UChar *srcChars,
	4109	int32_t srcLength)
	4110	{ return doReplace(start, _length, srcChars, 0, srcLength); }
	4111
	4112	inline UnicodeString&
	4113	UnicodeString::replace(int32_t start,
	4114	int32_t _length,
	4115	const UChar *srcChars,
	4116	int32_t srcStart,
	4117	int32_t srcLength)
	4118	{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
	4119
	4120	inline UnicodeString&
	4121	UnicodeString::replace(int32_t start,
	4122	int32_t _length,
	4123	UChar srcChar)
	4124	{ return doReplace(start, _length, &srcChar, 0, 1); }
	4125
	4126	inline UnicodeString&
	4127	UnicodeString::replaceBetween(int32_t start,
	4128	int32_t limit,
	4129	const UnicodeString& srcText)
	4130	{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
	4131
	4132	inline UnicodeString&
	4133	UnicodeString::replaceBetween(int32_t start,
	4134	int32_t limit,
	4135	const UnicodeString& srcText,
	4136	int32_t srcStart,
	4137	int32_t srcLimit)
	4138	{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
	4139
	4140	inline UnicodeString&
	4141	UnicodeString::findAndReplace(const UnicodeString& oldText,
	4142	const UnicodeString& newText)
	4143	{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
	4144	newText, 0, newText.length()); }
	4145
	4146	inline UnicodeString&
	4147	UnicodeString::findAndReplace(int32_t start,
	4148	int32_t _length,
	4149	const UnicodeString& oldText,
	4150	const UnicodeString& newText)
	4151	{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
	4152	newText, 0, newText.length()); }
	4153
	4154	// ============================
	4155	// extract
	4156	// ============================
	4157	inline void
	4158	UnicodeString::doExtract(int32_t start,
	4159	int32_t _length,
	4160	UnicodeString& target) const
	4161	{ target.replace(0, target.length(), *this, start, _length); }
	4162
	4163	inline void
	4164	UnicodeString::extract(int32_t start,
	4165	int32_t _length,
	4166	UChar *target,
	4167	int32_t targetStart) const
	4168	{ doExtract(start, _length, target, targetStart); }
	4169
	4170	inline void
	4171	UnicodeString::extract(int32_t start,
	4172	int32_t _length,
	4173	UnicodeString& target) const
	4174	{ doExtract(start, _length, target); }
	4175
	4176	#if !UCONFIG_NO_CONVERSION
	4177
	4178	inline int32_t
	4179	UnicodeString::extract(int32_t start,
	4180	int32_t _length,
	4181	char *dst,
	4182	const char *codepage) const
	4183
	4184	{
	4185	// This dstSize value will be checked explicitly
	4186	return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
	4187	}
	4188
	4189	#endif
	4190
	4191	inline void
	4192	UnicodeString::extractBetween(int32_t start,
	4193	int32_t limit,
	4194	UChar *dst,
	4195	int32_t dstStart) const {
	4196	pinIndex(start);
	4197	pinIndex(limit);
	4198	doExtract(start, limit - start, dst, dstStart);
	4199	}
	4200
	4201	inline UnicodeString
	4202	UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
	4203	return tempSubString(start, limit - start);
	4204	}
	4205
	4206	inline UChar
	4207	UnicodeString::doCharAt(int32_t offset) const
	4208	{
	4209	if((uint32_t)offset < (uint32_t)length()) {
	4210	return getArrayStart()[offset];
	4211	} else {
	4212	return kInvalidUChar;
	4213	}
	4214	}
	4215
	4216	inline UChar
	4217	UnicodeString::charAt(int32_t offset) const
	4218	{ return doCharAt(offset); }
	4219
	4220	inline UChar
	4221	UnicodeString::operator[] (int32_t offset) const
	4222	{ return doCharAt(offset); }
	4223
	4224	inline UBool
	4225	UnicodeString::isEmpty() const {
	4226	return fShortLength == 0;
	4227	}
	4228
	4229	//========================================
	4230	// Write implementation methods
	4231	//========================================
	4232	inline void
	4233	UnicodeString::setLength(int32_t len) {
	4234	if(len <= 127) {
	4235	fShortLength = (int8_t)len;
	4236	} else {
	4237	fShortLength = (int8_t)-1;
	4238	fUnion.fFields.fLength = len;
	4239	}
	4240	}
	4241
	4242	inline void
	4243	UnicodeString::setToEmpty() {
	4244	fShortLength = 0;
	4245	fFlags = kShortString;
	4246	}
	4247
	4248	inline void
	4249	UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
	4250	setLength(len);
	4251	fUnion.fFields.fArray = array;
	4252	fUnion.fFields.fCapacity = capacity;
	4253	}
	4254
	4255	inline const UChar *
	4256	UnicodeString::getTerminatedBuffer() {
	4257	if(!isWritable()) {
	4258	return 0;
	4259	} else {
	4260	UChar *array = getArrayStart();
	4261	int32_t len = length();
	4262	if(len < getCapacity() && ((fFlags&kRefCounted) == 0 \|\| refCount() == 1)) {
	4263	/*
	4264	* kRefCounted: Do not write the NUL if the buffer is shared.
	4265	* That is mostly safe, except when the length of one copy was modified
	4266	* without copy-on-write, e.g., via truncate(newLength) or remove(void).
	4267	* Then the NUL would be written into the middle of another copy's string.
	4268	*/
	4269	if(!(fFlags&kBufferIsReadonly)) {
	4270	/*
	4271	* We must not write to a readonly buffer, but it is known to be
	4272	* NUL-terminated if len<capacity.
	4273	* A shared, allocated buffer (refCount()>1) must not have its contents
	4274	* modified, but the NUL at [len] is beyond the string contents,
	4275	* and multiple string objects and threads writing the same NUL into the
	4276	* same location is harmless.
	4277	* In all other cases, the buffer is fully writable and it is anyway safe
	4278	* to write the NUL.
	4279	*
	4280	* Note: An earlier version of this code tested whether there is a NUL
	4281	* at [len] already, but, while safe, it generated lots of warnings from
	4282	* tools like valgrind and Purify.
	4283	*/
	4284	array[len] = 0;
	4285	}
	4286	return array;
	4287	} else if(cloneArrayIfNeeded(len+1)) {
	4288	array = getArrayStart();
	4289	array[len] = 0;
	4290	return array;
	4291	} else {
	4292	return 0;
	4293	}
	4294	}
	4295	}
	4296
	4297	inline UnicodeString&
	4298	UnicodeString::operator= (UChar ch)
	4299	{ return doReplace(0, length(), &ch, 0, 1); }
	4300
	4301	inline UnicodeString&
	4302	UnicodeString::operator= (UChar32 ch)
	4303	{ return replace(0, length(), ch); }
	4304
	4305	inline UnicodeString&
	4306	UnicodeString::setTo(const UnicodeString& srcText,
	4307	int32_t srcStart,
	4308	int32_t srcLength)
	4309	{
	4310	unBogus();
	4311	return doReplace(0, length(), srcText, srcStart, srcLength);
	4312	}
	4313
	4314	inline UnicodeString&
	4315	UnicodeString::setTo(const UnicodeString& srcText,
	4316	int32_t srcStart)
	4317	{
	4318	unBogus();
	4319	srcText.pinIndex(srcStart);
	4320	return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
	4321	}
	4322
	4323	inline UnicodeString&
	4324	UnicodeString::setTo(const UnicodeString& srcText)
	4325	{
	4326	return copyFrom(srcText);
	4327	}
	4328
	4329	inline UnicodeString&
	4330	UnicodeString::setTo(const UChar *srcChars,
	4331	int32_t srcLength)
	4332	{
	4333	unBogus();
	4334	return doReplace(0, length(), srcChars, 0, srcLength);
	4335	}
	4336
	4337	inline UnicodeString&
	4338	UnicodeString::setTo(UChar srcChar)
	4339	{
	4340	unBogus();
	4341	return doReplace(0, length(), &srcChar, 0, 1);
	4342	}
	4343
	4344	inline UnicodeString&
	4345	UnicodeString::setTo(UChar32 srcChar)
	4346	{
	4347	unBogus();
	4348	return replace(0, length(), srcChar);
	4349	}
	4350
	4351	inline UnicodeString&
	4352	UnicodeString::append(const UnicodeString& srcText,
	4353	int32_t srcStart,
	4354	int32_t srcLength)
	4355	{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
	4356
	4357	inline UnicodeString&
	4358	UnicodeString::append(const UnicodeString& srcText)
	4359	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
	4360
	4361	inline UnicodeString&
	4362	UnicodeString::append(const UChar *srcChars,
	4363	int32_t srcStart,
	4364	int32_t srcLength)
	4365	{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
	4366
	4367	inline UnicodeString&
	4368	UnicodeString::append(const UChar *srcChars,
	4369	int32_t srcLength)
	4370	{ return doReplace(length(), 0, srcChars, 0, srcLength); }
	4371
	4372	inline UnicodeString&
	4373	UnicodeString::append(UChar srcChar)
	4374	{ return doReplace(length(), 0, &srcChar, 0, 1); }
	4375
	4376	inline UnicodeString&
	4377	UnicodeString::operator+= (UChar ch)
	4378	{ return doReplace(length(), 0, &ch, 0, 1); }
	4379
	4380	inline UnicodeString&
	4381	UnicodeString::operator+= (UChar32 ch) {
	4382	return append(ch);
	4383	}
	4384
	4385	inline UnicodeString&
	4386	UnicodeString::operator+= (const UnicodeString& srcText)
	4387	{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
	4388
	4389	inline UnicodeString&
	4390	UnicodeString::insert(int32_t start,
	4391	const UnicodeString& srcText,
	4392	int32_t srcStart,
	4393	int32_t srcLength)
	4394	{ return doReplace(start, 0, srcText, srcStart, srcLength); }
	4395
	4396	inline UnicodeString&
	4397	UnicodeString::insert(int32_t start,
	4398	const UnicodeString& srcText)
	4399	{ return doReplace(start, 0, srcText, 0, srcText.length()); }
	4400
	4401	inline UnicodeString&
	4402	UnicodeString::insert(int32_t start,
	4403	const UChar *srcChars,
	4404	int32_t srcStart,
	4405	int32_t srcLength)
	4406	{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
	4407
	4408	inline UnicodeString&
	4409	UnicodeString::insert(int32_t start,
	4410	const UChar *srcChars,
	4411	int32_t srcLength)
	4412	{ return doReplace(start, 0, srcChars, 0, srcLength); }
	4413
	4414	inline UnicodeString&
	4415	UnicodeString::insert(int32_t start,
	4416	UChar srcChar)
	4417	{ return doReplace(start, 0, &srcChar, 0, 1); }
	4418
	4419	inline UnicodeString&
	4420	UnicodeString::insert(int32_t start,
	4421	UChar32 srcChar)
	4422	{ return replace(start, 0, srcChar); }
	4423
	4424
	4425	inline UnicodeString&
	4426	UnicodeString::remove()
	4427	{
	4428	// remove() of a bogus string makes the string empty and non-bogus
	4429	// we also un-alias a read-only alias to deal with NUL-termination
	4430	// issues with getTerminatedBuffer()
	4431	if(fFlags & (kIsBogus\|kBufferIsReadonly)) {
	4432	setToEmpty();
	4433	} else {
	4434	fShortLength = 0;
	4435	}
	4436	return *this;
	4437	}
	4438
	4439	inline UnicodeString&
	4440	UnicodeString::remove(int32_t start,
	4441	int32_t _length)
	4442	{
	4443	if(start <= 0 && _length == INT32_MAX) {
	4444	// remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
	4445	return remove();
	4446	}
	4447	return doReplace(start, _length, NULL, 0, 0);
	4448	}
	4449
	4450	inline UnicodeString&
	4451	UnicodeString::removeBetween(int32_t start,
	4452	int32_t limit)
	4453	{ return doReplace(start, limit - start, NULL, 0, 0); }
	4454
	4455	inline UnicodeString &
	4456	UnicodeString::retainBetween(int32_t start, int32_t limit) {
	4457	truncate(limit);
	4458	return doReplace(0, start, NULL, 0, 0);
	4459	}
	4460
	4461	inline UBool
	4462	UnicodeString::truncate(int32_t targetLength)
	4463	{
	4464	if(isBogus() && targetLength == 0) {
	4465	// truncate(0) of a bogus string makes the string empty and non-bogus
	4466	unBogus();
	4467	return FALSE;
	4468	} else if((uint32_t)targetLength < (uint32_t)length()) {
	4469	setLength(targetLength);
	4470	if(fFlags&kBufferIsReadonly) {
	4471	fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
	4472	}
	4473	return TRUE;
	4474	} else {
	4475	return FALSE;
	4476	}
	4477	}
	4478
	4479	inline UnicodeString&
	4480	UnicodeString::reverse()
	4481	{ return doReverse(0, length()); }
	4482
	4483	inline UnicodeString&
	4484	UnicodeString::reverse(int32_t start,
	4485	int32_t _length)
	4486	{ return doReverse(start, _length); }
	4487
	4488	U_NAMESPACE_END
	4489
	4490	#endif